Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance updates and misc. #2264

Merged
merged 26 commits into from
Mar 30, 2023
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
9259daf
Throttling up collection rate as an experiment.
sgoggins Mar 21, 2023
90ef906
Speed test.
sgoggins Mar 21, 2023
2c23a5c
Merge pull request #2260 from chaoss/dev
sgoggins Mar 22, 2023
d1f4cb1
Throttling back processes.
sgoggins Mar 22, 2023
2cadaad
Merge remote-tracking branch 'origin/spg-speed-test' into spg-speed-test
sgoggins Mar 22, 2023
36e777e
Read the docs back to 3.8
sgoggins Mar 22, 2023
2fa60b4
Getting rid of null strings in commits table.
sgoggins Mar 22, 2023
d06ee05
Fixing.
sgoggins Mar 22, 2023
0cadc6b
Alembic indentation fix.
sgoggins Mar 22, 2023
fed00cb
updating database update message from Alembic.
sgoggins Mar 22, 2023
3c4d52f
Data models
sgoggins Mar 23, 2023
bd6e349
Setting autovacuum for commits table, which gets heavy updates, to en…
sgoggins Mar 24, 2023
897f846
facade settings update
sgoggins Mar 24, 2023
06b2898
update to max concurrent processes
sgoggins Mar 24, 2023
c56b26f
message on schema update more clear
sgoggins Mar 24, 2023
b6bc65c
version update start
sgoggins Mar 24, 2023
81bd048
Adding a check to see if the cmt_ght_author_id is NULL. If it is, we …
sgoggins Mar 28, 2023
8f8e038
still tweaking max performance
sgoggins Mar 28, 2023
0d9660e
dialing back facade load to leave room for others
sgoggins Mar 28, 2023
bdcdac5
rad me
sgoggins Mar 28, 2023
47bbc65
Merge pull request #2268 from chaoss/dev
sgoggins Mar 28, 2023
acc40f4
removed stray file
sgoggins Mar 28, 2023
18d7db7
albembic comment update
sgoggins Mar 28, 2023
4cfd8a8
Merge branch 'spg-speed-test' of https://github.com/chaoss/augur into…
sgoggins Mar 28, 2023
f6952c8
Fixed repo/collection mis-sizing per @isaac
sgoggins Mar 30, 2023
d133bc5
updated per @isaacs detailed instructions
sgoggins Mar 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@

# Augur NEW Release v0.50.0
# Augur NEW Release v0.50.1

[![first-timers-only](https://img.shields.io/badge/first--timers--only-friendly-blue.svg?style=flat-square)](https://www.firsttimersonly.com/) We follow the [First Timers Only](https://www.firsttimersonly.com/) philosophy of tagging issues for first timers only, and walking one newcomer through the resolution process weekly. [You can find these issues tagged with "first timers only" on our issues list.](https://github.com/chaoss/augur/labels/first-timers-only).

Expand Down
4 changes: 2 additions & 2 deletions augur/application/cli/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ def start(disable_collection, development, port):
os.remove("celerybeat-schedule.db")

scheduling_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=1 -n scheduling:{uuid.uuid4().hex}@%h -Q scheduling"
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=14 -n core:{uuid.uuid4().hex}@%h"
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=5 -n secondary:{uuid.uuid4().hex}@%h -Q secondary"
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=70 -n core:{uuid.uuid4().hex}@%h"
sgoggins marked this conversation as resolved.
Show resolved Hide resolved
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=10 -n secondary:{uuid.uuid4().hex}@%h -Q secondary"

scheduling_worker_process = subprocess.Popen(scheduling_worker.split(" "))
core_worker_process = subprocess.Popen(core_worker.split(" "))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""No null author affiliation and committer affiliation in commits table

Revision ID: 7
Revises: 6
Create Date: 2023-02-23 10:14:08.787528

"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy.sql import text
import re


# revision identifiers, used by Alembic.
revision = '13'
down_revision = '12'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
conn = op.get_bind()
conn.execute(text(f"""
ALTER TABLE "augur_data"."commits"
ALTER COLUMN "cmt_author_affiliation" SET DEFAULT NULL,
ALTER COLUMN "cmt_committer_affiliation" SET DEFAULT NULL;
"""))
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Frequent vaccuming on high update commits table

Revision ID: 14
Revises: 13
Create Date: 2023-02-23 10:14:08.787528

"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy.sql import text
import re


# revision identifiers, used by Alembic.
revision = '14'
down_revision = '13'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
conn = op.get_bind()
conn.execute(text(f"""
ALTER TABLE augur_data.commits SET (autovacuum_vacuum_scale_factor = 0, autovacuum_vacuum_threshold = 1000);
"""))
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Commit performance update

Revision ID: 14
Revises: 13
Create Date: 2023-02-23 10:14:08.787528

"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy.sql import text
import re


# revision identifiers, used by Alembic.
revision = '15'
down_revision = '14'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
conn = op.get_bind()
conn.execute(text(f"""
ALTER TABLE "augur_data"."commits"
DROP CONSTRAINT "cmt_ght_author_cntrb_id_fk",
ADD CONSTRAINT "cmt_ght_author_cntrb_id_fk" FOREIGN KEY ("cmt_ght_author_id") REFERENCES "augur_data"."contributors" ("cntrb_id") ON DELETE RESTRICT ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED;
"""))
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
conn = op.get_bind()
conn.execute(text(f"""
ALTER TABLE "augur_data"."commits"
DROP CONSTRAINT "cmt_ght_author_cntrb_id_fk",
ADD CONSTRAINT "cmt_ght_author_cntrb_id_fk" FOREIGN KEY ("cmt_ght_author_id") REFERENCES "augur_data"."contributors" ("cntrb_id");
"""))
pass
# ### end Alembic commands ###
5 changes: 3 additions & 2 deletions augur/tasks/github/facade_github/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,9 @@ def link_commits_to_contributor(session,contributorQueue):
UPDATE commits
SET cmt_ght_author_id=:cntrb_id
WHERE
cmt_author_raw_email=:cntrb_email
OR cmt_author_email=:cntrb_email
(cmt_author_raw_email=:cntrb_email
OR cmt_author_email=:cntrb_email)
AND cmt_ght_author_id is NULL
IsaacMilarky marked this conversation as resolved.
Show resolved Hide resolved
""").bindparams(cntrb_id=cntrb["cntrb_id"],cntrb_email=cntrb["email"])

#engine.execute(query, **data)
Expand Down
4 changes: 2 additions & 2 deletions augur/tasks/start_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,14 +365,14 @@ def augur_collection_monitor():
enabled_phase_names = get_enabled_phase_names_from_config(session.logger, session)

if primary_repo_collect_phase.__name__ in enabled_phase_names:
sgoggins marked this conversation as resolved.
Show resolved Hide resolved
start_primary_collection(session, max_repo=50, days=30)
start_primary_collection(session, max_repo=60, days=30)

if secondary_repo_collect_phase.__name__ in enabled_phase_names:
start_secondary_collection(session, max_repo=30, days=30)

if facade_phase.__name__ in enabled_phase_names:
#Schedule facade collection before clone/updates as that is a higher priority
start_facade_collection(session, max_repo=30, days=30)
IsaacMilarky marked this conversation as resolved.
Show resolved Hide resolved
start_facade_collection(session, max_repo=20, days=30)
start_facade_clone_update(session,max_repo=15,days=30)


Expand Down