-
Notifications
You must be signed in to change notification settings - Fork 845
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Test traffic Merge Into Dev: Request for Feedback #2220
Changes from 4 commits
34817a6
a7c1dce
a78aab4
f48a539
3d38b66
e00efb2
6479ff2
70009c2
db54fea
1f4a4ed
3be2781
7341b20
1a35096
4076c70
3deecc4
f6740a8
3a34013
e581410
a90bf5e
64df9de
9c40282
0dc4fab
2325b80
62622bd
5ac8132
fac0134
a549bcf
47c8e3a
4fd183f
3b73118
440e97e
46c0908
8771742
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3348,3 +3348,31 @@ class PullRequestReviewMessageRef(Base): | |
msg = relationship("Message") | ||
pr_review = relationship("PullRequestReview") | ||
repo = relationship("Repo") | ||
|
||
|
||
class RepoClone(Base): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ABrain7710 / @IsaacMilarky : Is this the right way? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There should be a unique constraint on the repo_id if you plan on using postgres 'on conflict' inserts. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did this, and then I realized this is like "releases", or repo_info... we want to hold the historical record for the repos. There should not be any conflicts since the primary key is an autoincrement @IsaacMilarky |
||
__tablename__ = "repo_clones_data" | ||
__table_args__ = {"schema": "augur_data"} | ||
|
||
repo_clone_data_id = Column( | ||
BigInteger, | ||
primary_key=True, | ||
server_default=text( | ||
"nextval('augur_data.repo_clones_data_id_seq'::regclass)" | ||
), | ||
) | ||
repo_id = Column( | ||
ForeignKey( | ||
"augur_data.repo.repo_id", | ||
ondelete="RESTRICT", | ||
onupdate="CASCADE", | ||
deferrable=True, | ||
initially="DEFERRED", | ||
), | ||
nullable=False, | ||
) | ||
unique_clones = Column(BigInteger) | ||
count_clones = Column(BigInteger) | ||
clone_data_timestamp = Column(TIMESTAMP(precision=6)) | ||
|
||
repo = relationship("Repo") |
sgoggins marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
"""traffic additions | ||
|
||
Revision ID: 3 | ||
Revises: 2 | ||
Create Date: 2022-12-30 19:23:17.997570 | ||
|
||
""" | ||
from alembic import op | ||
import sqlalchemy as sa | ||
from sqlalchemy.dialects import postgresql | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = '3' | ||
down_revision = '2' | ||
branch_labels = None | ||
depends_on = None | ||
|
||
|
||
def upgrade(): | ||
|
||
add_repo_clone_data_table_1() | ||
|
||
def downgrade(): | ||
|
||
upgrade = False | ||
|
||
add_repo_clone_data_table_1(upgrade) | ||
|
||
|
||
def add_repo_clone_data_table_1(upgrade = True): | ||
|
||
if upgrade: | ||
|
||
op.create_table('repo_clones_data', | ||
sa.Column('repo_clone_data_id', sa.BigInteger(), server_default=sa.text("nextval('augur_data.repo_clones_data_id_seq'::regclass)"), nullable=False), | ||
sa.Column('repo_id', sa.BigInteger(), nullable=False), | ||
sa.Column('unique_clones', sa.BigInteger(), nullable=True), | ||
sa.Column('count_clones', sa.BigInteger(), nullable=True), | ||
sa.Column('clone_data_timestamp', postgresql.TIMESTAMP(precision=6), nullable=True), | ||
sa.ForeignKeyConstraint(['repo_id'], ['augur_data.repo.repo_id'], onupdate='CASCADE', ondelete='RESTRICT', initially='DEFERRED', deferrable=True), | ||
sa.PrimaryKeyConstraint('repo_clone_data_id'), | ||
schema='augur_data' | ||
) | ||
op.alter_column('releases', 'release_id', | ||
sgoggins marked this conversation as resolved.
Show resolved
Hide resolved
|
||
existing_type=sa.CHAR(length=256), | ||
type_=sa.CHAR(length=128), | ||
existing_nullable=False, | ||
existing_server_default=sa.text('nextval(\'"augur_data".releases_release_id_seq\'::regclass)'), | ||
schema='augur_data') | ||
op.drop_constraint('user_repos_repo_id_fkey', 'user_repos', schema='augur_operations', type_='foreignkey') | ||
op.create_foreign_key(None, 'user_repos', 'repo', ['repo_id'], ['repo_id'], source_schema='augur_operations', referent_schema='augur_data') | ||
|
||
else: | ||
|
||
op.drop_constraint(None, 'user_repos', schema='augur_operations', type_='foreignkey') | ||
op.create_foreign_key('user_repos_repo_id_fkey', 'user_repos', 'repo', ['repo_id'], ['repo_id'], source_schema='augur_operations') | ||
op.alter_column('releases', 'release_id', | ||
existing_type=sa.CHAR(length=128), | ||
type_=sa.CHAR(length=256), | ||
existing_nullable=False, | ||
existing_server_default=sa.text('nextval(\'"augur_data".releases_release_id_seq\'::regclass)'), | ||
schema='augur_data') | ||
op.drop_table('repo_clones_data', schema='augur_data') |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2777,6 +2777,35 @@ CREATE TABLE augur_data.working_commits ( | |
|
||
ALTER TABLE augur_data.working_commits OWNER TO augur; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ABrain7710 / @IsaacMilarky : Is this the right way to do this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The proper way to do this is with alembic which you did already. I would not do it this way. |
||
-- | ||
-- Name: repo_clones_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur | ||
-- | ||
|
||
CREATE SEQUENCE augur_data.repo_clones_data_id_seq | ||
START WITH 1 | ||
INCREMENT BY 1 | ||
NO MINVALUE | ||
NO MAXVALUE | ||
CACHE 1; | ||
|
||
|
||
ALTER TABLE augur_data.repo_clones_data_id_seq OWNER TO augur; | ||
|
||
-- | ||
-- Name: repo_clones; Type: TABLE; Schema: augur_data; Owner: augur | ||
-- | ||
|
||
CREATE TABLE augur_data.repo_clones_data ( | ||
repo_clone_data_id bigint DEFAULT nextval('augur_data.repo_clones_data_id_seq'::regclass) NOT NULL, | ||
repo_id integer NOT NULL, | ||
unique_clones integer NOT NULL, | ||
count_clones integer NOT NULL, | ||
clone_data_timestamp timestamp(0) without time zone | ||
); | ||
|
||
|
||
ALTER TABLE augur_data.repo_clones_data OWNER TO augur; | ||
|
||
-- | ||
-- Name: affiliations_corp_id_seq; Type: SEQUENCE; Schema: augur_operations; Owner: augur | ||
-- | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import time | ||
import logging | ||
|
||
from augur.tasks.init.celery_app import celery_app as celery, engine | ||
from augur.application.db.data_parse import * | ||
from augur.tasks.github.util.github_paginator import GithubPaginator | ||
from augur.tasks.github.util.github_task_session import GithubTaskSession | ||
from augur.tasks.util.worker_util import remove_duplicate_dicts | ||
from augur.tasks.github.util.util import get_owner_repo | ||
from augur.application.db.models import RepoClone, Repo | ||
from augur.application.db.util import execute_session_query | ||
|
||
@celery.task | ||
def collect_github_repo_clones_data(repo_git: str) -> None: | ||
|
||
logger = logging.getLogger(collect_github_repo_clones_data.__name__) | ||
|
||
# using GithubTaskSession to get our repo_obj for which we will store data of clones | ||
with GithubTaskSession(logger) as session: | ||
|
||
query = session.query(Repo).filter(Repo.repo_git == repo_git) | ||
repo_obj = execute_session_query(query, 'one') | ||
repo_id = repo_obj.repo_id | ||
|
||
owner, repo = get_owner_repo(repo_git) | ||
|
||
logger.info(f"Collecting Github repository clone data for {owner}/{repo}") | ||
|
||
clones_data = retrieve_all_clones_data(repo_git, logger) | ||
|
||
if clones_data: | ||
process_clones_data(clones_data, f"{owner}/{repo}: Traffic task", repo_id, logger) | ||
else: | ||
logger.info(f"{owner}/{repo} has no clones") | ||
|
||
|
||
def retrieve_all_clones_data(repo_git: str, logger): | ||
owner, repo = get_owner_repo(repo_git) | ||
|
||
url = f"https://api.github.com/repos/{owner}/{repo}/traffic/clones" | ||
|
||
# define GithubTaskSession to handle insertions, and store oauth keys | ||
with GithubTaskSession(logger, engine) as session: | ||
|
||
clones = GithubPaginator(url, session.oauths, logger) | ||
|
||
num_pages = clones.get_num_pages() | ||
all_data = [] | ||
for page_data, page in clones.iter_pages(): | ||
|
||
if page_data is None: | ||
return all_data | ||
|
||
elif len(page_data) == 0: | ||
logger.debug(f"{repo.capitalize()} Traffic Page {page} contains no data...returning") | ||
logger.info(f"Traffic Page {page} of {num_pages}") | ||
return all_data | ||
|
||
logger.info(f"{repo} Traffic Page {page} of {num_pages}") | ||
|
||
all_data += page_data | ||
|
||
return all_data | ||
|
||
|
||
def process_clones_data(clones_data, task_name, repo_id, logger) -> None: | ||
clone_history_data = clones_data[0]['clones'] | ||
|
||
clone_history_data_dicts = extract_needed_clone_history_data(clone_history_data, repo_id) | ||
|
||
with GithubTaskSession(logger, engine) as session: | ||
|
||
clone_history_data = remove_duplicate_dicts(clone_history_data_dicts, 'clone_data_timestamp') | ||
logger.info(f"{task_name}: Inserting {len(clone_history_data_dicts)} clone history records") | ||
|
||
session.insert_data(clone_history_data_dicts, RepoClone, ['repo_id']) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is fine because to run our frontend, we do still need this file. |
||
"Frontend": { | ||
"host": "ebay.chaoss.io", | ||
"port": 5000, | ||
"ssl": false | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@IsaacMilarky / @ABrain7710 : What needs fixing here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This function LGTM unless it's throwing errors.