diff --git a/README.md b/README.md index aa3c66bf53..c0bde4a5c5 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,6 @@ Augur is now releasing a dramatically improved new version to the main branch. I - The next release of the new version will include a hosted version of Augur where anyone can create an account and add repos “they care about”. If the hosted instance already has a requested organization or repository it will be added to a user’s view. If its a new repository or organization, the user will be notified that collection will take (time required for the scale of repositories added). ## What is Augur? - Augur is a software suite for collecting and measuring structured data about [free](https://www.fsf.org/about/) and [open-source](https://opensource.org/docs/osd) software (FOSS) communities. diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 541d0efc8b..b93a852ac5 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -466,8 +466,22 @@ def extract_needed_contributor_data(contributor, tool_source, tool_version, data return contributor +def extract_needed_clone_history_data(clone_history_data:List[dict], repo_id:int): + if len(clone_history_data) == 0: + return [] + + clone_data_dicts = [] + for clone in clone_history_data: + clone_data_dict = { + 'repo_id': repo_id, + 'clone_data_timestamp': clone['timestamp'], + 'count_clones': clone['count'], + 'unique_clones': clone['uniques'] + } + clone_data_dicts.append(clone_data_dict) + return clone_data_dicts def extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id, tool_version, data_source): diff --git a/augur/application/db/models/__init__.py b/augur/application/db/models/__init__.py index 3941db76c0..2bf4683257 100644 --- a/augur/application/db/models/__init__.py +++ b/augur/application/db/models/__init__.py @@ -63,6 +63,7 @@ PullRequestTeam, PullRequestRepo, PullRequestReviewMessageRef, + RepoClone, ) from augur.application.db.models.spdx import ( diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 3eb5c0ac85..6c5d8decae 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -3348,3 +3348,31 @@ class PullRequestReviewMessageRef(Base): msg = relationship("Message") pr_review = relationship("PullRequestReview") repo = relationship("Repo") + + +class RepoClone(Base): + __tablename__ = "repo_clones_data" + __table_args__ = {"schema": "augur_data"} + + repo_clone_data_id = Column( + BigInteger, + primary_key=True, + server_default=text( + "nextval('augur_data.repo_clones_data_id_seq'::regclass)" + ), + ) + repo_id = Column( + ForeignKey( + "augur_data.repo.repo_id", + ondelete="RESTRICT", + onupdate="CASCADE", + deferrable=True, + initially="DEFERRED", + ), + nullable=False, + ) + unique_clones = Column(BigInteger) + count_clones = Column(BigInteger) + clone_data_timestamp = Column(TIMESTAMP(precision=6)) + + repo = relationship("Repo") \ No newline at end of file diff --git a/augur/application/schema/alembic/versions/12_traffic_additions.py b/augur/application/schema/alembic/versions/12_traffic_additions.py new file mode 100644 index 0000000000..bb1f3a08bb --- /dev/null +++ b/augur/application/schema/alembic/versions/12_traffic_additions.py @@ -0,0 +1,70 @@ +"""traffic additions + +Revision ID: 12 +Revises: 11 +Create Date: 2022-12-30 19:23:17.997570 + +""" +from alembic.autogenerate import renderers + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql +from sqlalchemy.schema import Sequence + +# revision identifiers, used by Alembic. +revision = '12' +down_revision = '11' +branch_labels = None +depends_on = None + +traffic_sequence = Sequence('repo_clones_data_id_seq', schema='augur_data') + +# Current Error + # File "/home/sean/github/berkeley/augur/application/schema/alembic/versions/12_traffic_additions.py", line 38, in add_repo_clone_data_table_1 + # op.execute(schema.CreateSequence(traffic_sequence)) + # NameError: name 'schema' is not defined + +def upgrade(): + + add_repo_clone_data_table_1() + +def downgrade(): + + upgrade = False + + add_repo_clone_data_table_1(upgrade) + + +def add_repo_clone_data_table_1(upgrade = True): + + if upgrade: + + op.execute(sa.schema.CreateSequence(traffic_sequence)) + op.create_table('repo_clones_data', + sa.Column('repo_clone_data_id', sa.BigInteger(), server_default=sa.text("nextval('augur_data.repo_clones_data_id_seq'::regclass)"), nullable=False), + sa.Column('repo_id', sa.BigInteger(), nullable=False), + sa.Column('unique_clones', sa.BigInteger(), nullable=True), + sa.Column('count_clones', sa.BigInteger(), nullable=True), + sa.Column('clone_data_timestamp', postgresql.TIMESTAMP(precision=6), nullable=True), + sa.ForeignKeyConstraint(['repo_id'], ['augur_data.repo.repo_id'], onupdate='CASCADE', ondelete='RESTRICT', initially='DEFERRED', deferrable=True), + sa.PrimaryKeyConstraint('repo_clone_data_id'), + schema='augur_data' + ) + + ### I don't think we want repo_id to be unique here. I think we want to have many entries per repo + # op.create_unique_constraint('repo_clone_unique', 'repo_clones_data', ['repo_id']) + + + # I do not think this is necessary, SPG, 3/19/2023 + #op.drop_constraint('user_repo_user_id_fkey', 'user_repos', schema='augur_operations', type_='foreignkey') + #op.create_foreign_key(None, 'user_repos', 'repo', ['repo_id'], ['repo_id'], source_schema='augur_operations', referent_schema='augur_data') + + else: + + # I do not this this is necessary, SPG, 3/19/2023 + #op.drop_constraint(None, 'user_repos', schema='augur_operations', type_='foreignkey') + #op.create_foreign_key('user_repos_repo_id_fkey', 'user_repos', 'repo', ['repo_id'], ['repo_id'], source_schema='augur_operations') + op.drop_table('repo_clones_data', schema='augur_data') + + diff --git a/augur/application/schema/augur_full.sql b/augur/application/schema/augur_full.sql index ca01dea878..6eef895cbe 100644 --- a/augur/application/schema/augur_full.sql +++ b/augur/application/schema/augur_full.sql @@ -2777,6 +2777,35 @@ CREATE TABLE augur_data.working_commits ( ALTER TABLE augur_data.working_commits OWNER TO augur; +-- +-- Name: repo_clones_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur +-- + +CREATE SEQUENCE augur_data.repo_clones_data_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER TABLE augur_data.repo_clones_data_id_seq OWNER TO augur; + +-- +-- Name: repo_clones; Type: TABLE; Schema: augur_data; Owner: augur +-- + +CREATE TABLE augur_data.repo_clones_data ( + repo_clone_data_id bigint DEFAULT nextval('augur_data.repo_clones_data_id_seq'::regclass) NOT NULL, + repo_id integer NOT NULL, + unique_clones integer NOT NULL, + count_clones integer NOT NULL, + clone_data_timestamp timestamp(0) without time zone +); + + +ALTER TABLE augur_data.repo_clones_data OWNER TO augur; + -- -- Name: affiliations_corp_id_seq; Type: SEQUENCE; Schema: augur_operations; Owner: augur -- diff --git a/augur/tasks/github/traffic/__init__.py b/augur/tasks/github/traffic/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/augur/tasks/github/traffic/core.py b/augur/tasks/github/traffic/core.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/augur/tasks/github/traffic/tasks.py b/augur/tasks/github/traffic/tasks.py new file mode 100644 index 0000000000..8b8652ec43 --- /dev/null +++ b/augur/tasks/github/traffic/tasks.py @@ -0,0 +1,76 @@ +import time +import logging + +from augur.tasks.init.celery_app import celery_app as celery, engine +from augur.application.db.data_parse import * +from augur.tasks.github.util.github_paginator import GithubPaginator +from augur.tasks.github.util.github_task_session import GithubTaskSession +from augur.tasks.util.worker_util import remove_duplicate_dicts +from augur.tasks.github.util.util import get_owner_repo +from augur.application.db.models import RepoClone, Repo +from augur.application.db.util import execute_session_query + +@celery.task +def collect_github_repo_clones_data(repo_git: str) -> None: + + logger = logging.getLogger(collect_github_repo_clones_data.__name__) + + # using GithubTaskSession to get our repo_obj for which we will store data of clones + with GithubTaskSession(logger) as session: + + query = session.query(Repo).filter(Repo.repo_git == repo_git) + repo_obj = execute_session_query(query, 'one') + repo_id = repo_obj.repo_id + + owner, repo = get_owner_repo(repo_git) + + logger.info(f"Collecting Github repository clone data for {owner}/{repo}") + + clones_data = retrieve_all_clones_data(repo_git, logger) + + if clones_data: + process_clones_data(clones_data, f"{owner}/{repo}: Traffic task", repo_id, logger) + else: + logger.info(f"{owner}/{repo} has no clones") + + +def retrieve_all_clones_data(repo_git: str, logger): + owner, repo = get_owner_repo(repo_git) + + url = f"https://api.github.com/repos/{owner}/{repo}/traffic/clones" + + # define GithubTaskSession to handle insertions, and store oauth keys + with GithubTaskSession(logger, engine) as session: + + clones = GithubPaginator(url, session.oauths, logger) + + num_pages = clones.get_num_pages() + all_data = [] + for page_data, page in clones.iter_pages(): + + if page_data is None: + return all_data + + elif len(page_data) == 0: + logger.debug(f"{repo.capitalize()} Traffic Page {page} contains no data...returning") + logger.info(f"Traffic Page {page} of {num_pages}") + return all_data + + logger.info(f"{repo} Traffic Page {page} of {num_pages}") + + all_data += page_data + + return all_data + + +def process_clones_data(clones_data, task_name, repo_id, logger) -> None: + clone_history_data = clones_data[0]['clones'] + + clone_history_data_dicts = extract_needed_clone_history_data(clone_history_data, repo_id) + + with GithubTaskSession(logger, engine) as session: + + clone_history_data = remove_duplicate_dicts(clone_history_data_dicts, 'clone_data_timestamp') + logger.info(f"{task_name}: Inserting {len(clone_history_data_dicts)} clone history records") + + session.insert_data(clone_history_data_dicts, RepoClone, ['repo_id']) \ No newline at end of file diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py index 84f3622b0c..ccb957dc2c 100644 --- a/augur/tasks/init/celery_app.py +++ b/augur/tasks/init/celery_app.py @@ -46,7 +46,8 @@ class CollectionState(Enum): 'augur.tasks.github.repo_info.tasks', 'augur.tasks.github.detect_move.tasks', 'augur.tasks.github.pull_requests.files_model.tasks', - 'augur.tasks.github.pull_requests.commits_model.tasks'] + 'augur.tasks.github.pull_requests.commits_model.tasks', + 'augur.tasks.github.traffic.tasks'] git_tasks = ['augur.tasks.git.facade_tasks', 'augur.tasks.git.dependency_tasks.tasks', @@ -91,7 +92,7 @@ def augur_handle_task_failure(self,exc,task_id,repo_git,logger_name,collection_h #Only set to error if the repo was actually running at the time. #This is to allow for things like exiting from collection without error. #i.e. detect_repo_move changes the repo's repo_git and resets collection to pending without error - prevStatus = getattr(repoS, f"{collection_hook}_status") + prevStatus = getattr(repo, f"{collection_hook}_status") if prevStatus == CollectionState.COLLECTING.value or prevStatus == CollectionState.INITIALIZING.value: setattr(repoStatus, f"{collection_hook}_status", CollectionState.ERROR.value) diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index edddcaa11a..12b0859224 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -22,7 +22,7 @@ from augur.tasks.github.pull_requests.files_model.tasks import process_pull_request_files from augur.tasks.github.pull_requests.commits_model.tasks import process_pull_request_commits from augur.tasks.git.dependency_tasks.tasks import process_ossf_scorecard_metrics - +from augur.tasks.github.traffic.tasks import collect_github_repo_clones_data from augur.tasks.git.facade_tasks import * from augur.tasks.db.refresh_materialized_views import * # from augur.tasks.data_analysis import * @@ -74,6 +74,36 @@ def primary_repo_collect_phase(repo_git): #A chain is needed for each repo. repo_info_task = collect_repo_info.si(repo_git)#collection_task_wrapper(self) + +## I think this section is outdated +# ### Section from traffic metric merge that may need to be changed + +# with DatabaseSession(logger) as session: +# query = session.query(Repo) +# repos = execute_session_query(query, 'all') +# #Just use list comprehension for simple group +# repo_info_tasks = [collect_repo_info.si(repo.repo_git) for repo in repos] + +# for repo in repos: +# first_tasks_repo = group(collect_issues.si(repo.repo_git),collect_pull_requests.si(repo.repo_git),collect_github_repo_clones_data.si(repo.repo_git)) +# second_tasks_repo = group(collect_events.si(repo.repo_git), +# collect_github_messages.si(repo.repo_git),process_pull_request_files.si(repo.repo_git), process_pull_request_commits.si(repo.repo_git)) + +# repo_chain = chain(first_tasks_repo,second_tasks_repo) +# issue_dependent_tasks.append(repo_chain) + +# repo_task_group = group( +# *repo_info_tasks, +# chain(group(*issue_dependent_tasks),process_contributors.si()), +# generate_facade_chain(logger), +# collect_releases.si() +# ) + +# chain(repo_task_group, refresh_materialized_views.si()).apply_async() + +# #### End of section from traffic metric merge that may need to be changed + + primary_repo_jobs = group( collect_issues.si(repo_git), collect_pull_requests.si(repo_git) @@ -81,7 +111,8 @@ def primary_repo_collect_phase(repo_git): secondary_repo_jobs = group( collect_events.si(repo_git),#*create_grouped_task_load(dataList=first_pass, task=collect_events).tasks, - collect_github_messages.si(repo_git),#*create_grouped_task_load(dataList=first_pass,task=collect_github_messages).tasks, + collect_github_messages.si(repo_git), #*create_grouped_task_load(dataList=first_pass,task=collect_github_messages).tasks, + collect_github_repo_clones_data.si(repo_git), ) repo_task_group = group( diff --git a/frontend/frontend.config.json b/frontend/frontend.config.json new file mode 100644 index 0000000000..14e329784d --- /dev/null +++ b/frontend/frontend.config.json @@ -0,0 +1,7 @@ +{ + "Frontend": { + "host": "ebay.chaoss.io", + "port": 5000, + "ssl": false + } +} \ No newline at end of file diff --git a/frontend/package-lock.json b/frontend/package-lock.json new file mode 100644 index 0000000000..e69de29bb2 diff --git a/frontend/src/store/modules/common/getters.ts b/frontend/src/store/modules/common/getters.ts index e72463d72a..2e09133c46 100644 --- a/frontend/src/store/modules/common/getters.ts +++ b/frontend/src/store/modules/common/getters.ts @@ -77,9 +77,22 @@ export default { console.log(items) return items }, + repoCountInGroup: (state:any) => (rg_id:number) => { + return state.cache.getRepos.filter((repo: any) => repo.repo_group_id === rg_id).length; + }, + sortedRepoGroups: (state:any) => (col:string, ascending: boolean) => { console.log(state.cache) const items = [...state.cache.getRepoGroups].sort((a,b) => { + if (col == "rg_repos"){ + if (state.cache.getRepos.filter((repo: any) => repo.repo_group_id === a.repo_group_id).length > + state.cache.getRepos.filter((repo: any) => repo.repo_group_id === b.repo_group_id).length) { + return ascending ? -1 : 1 + } + else { + return ascending ? 1 : -1 + } + } if (a[col] > b[col]) { return ascending ? 1 : -1 } else if (a[col] < b[col]) { diff --git a/frontend/src/views/RepoGroups.vue b/frontend/src/views/RepoGroups.vue index 2a0a9b97d6..1fa6137ad3 100755 --- a/frontend/src/views/RepoGroups.vue +++ b/frontend/src/views/RepoGroups.vue @@ -47,6 +47,16 @@ > + +
+
Repos
+
+
+
Website
@@ -92,6 +102,7 @@ {{ group.rg_name }} {{ group.rg_description }} + {{ repoCountInGroup(group.repo_group_id) }} {{ group.rg_website }} {{ group.rg_last_modified }} {{ group.rg_type }} @@ -147,6 +158,7 @@ import Component from "vue-class-component"; import Vue from "vue"; import { mapActions, mapGetters, mapMutations } from "vuex"; import Spinner from "@/components/Spinner.vue"; + @Component({ components: { Spinner @@ -163,7 +175,7 @@ import Spinner from "@/components/Spinner.vue"; ...mapActions("compare", ["addComparedGroup", "setBaseGroup"]) }, computed: { - ...mapGetters("common", ["sortedRepoGroups", "repoGroups"]) + ...mapGetters("common", ["sortedRepoGroups", "repoGroups", "repoCountInGroup"]) } }) export default class RepoGroups extends Vue { @@ -194,6 +206,7 @@ export default class RepoGroups extends Vue { getRepoRelations!: any; loadRepoGroups!: any; + repoCountInGroup!: any; repo_groups!: any[]; sortedRepoGroups!: any[]; addRepoGroup!: any;