Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modifications to next hid_counter generation #12948

Merged
merged 4 commits into from Nov 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
37 changes: 28 additions & 9 deletions lib/galaxy/model/__init__.py
Expand Up @@ -68,6 +68,7 @@
type_coerce,
Unicode,
UniqueConstraint,
update,
VARCHAR,
)
from sqlalchemy.exc import OperationalError
Expand Down Expand Up @@ -2440,15 +2441,33 @@ def add_pending_items(self, set_output_hid=True):
self._pending_additions = []

def _next_hid(self, n=1):
# this is overriden in mapping.py db_next_hid() method
if len(self.datasets) == 0:
return n
else:
last_hid = 0
for dataset in self.datasets:
if dataset.hid > last_hid:
last_hid = dataset.hid
return last_hid + n
"""
Generate next_hid from the database in a concurrency safe way:
1. Retrieve hid_counter from database
2. Increment hid_counter by n and store in database
3. Return retrieved hid_counter.

Handle with SQLAlchemy Core to keep this independent from current session state, except:
expire hid_counter attribute, since its value in the session is no longer valid.
"""
session = object_session(self)
engine = session.bind
table = self.__table__
history_id = cached_id(self)
update_stmt = update(table).where(table.c.id == history_id).values(hid_counter=table.c.hid_counter + n)

with engine.begin() as conn:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That creates a new commit, which we don't want. Why is this needed ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

item 2 in the PR description. In my understanding, there's no autocommit in SA2.0. i.e., it would work now using with endine.connect() as conn, but unless we did conn.commit() in the end, it wouldn't work as soon as we moved to SA2.0.

Copy link
Member

@mvdbeek mvdbeek Nov 19, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you verify we're not expiring all attributes here ? If we are this would be a huge performance penalty. It seems we might not be, since you're explicitly expiring hid_counter ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a test for that very reason 😃 4ea1185

if engine.name in ['postgres', 'postgresql']:
stmt = update_stmt.returning(table.c.hid_counter)
updated_hid = conn.execute(stmt).scalar()
hid = updated_hid - n
else:
select_stmt = select(table.c.hid_counter).where(table.c.id == history_id).with_for_update()
hid = conn.execute(select_stmt).scalar()
conn.execute(update_stmt)

session.expire(self, ['hid_counter'])
return hid

def add_galaxy_session(self, galaxy_session, association=None):
if association is None:
Expand Down
31 changes: 1 addition & 30 deletions lib/galaxy/model/mapping.py
Expand Up @@ -8,10 +8,7 @@
from threading import local
from typing import Optional, Type

from sqlalchemy import (
and_,
select,
)
from sqlalchemy import and_
from sqlalchemy.orm import class_mapper, object_session, relation

from galaxy import model
Expand All @@ -31,32 +28,6 @@
"creating_job_associations", relation(model.JobToOutputDatasetCollectionAssociation, viewonly=True))


# Helper methods.
def db_next_hid(self, n=1):
"""
db_next_hid( self )

Override __next_hid to generate from the database in a concurrency safe way.
Loads the next history ID from the DB and returns it.
It also saves the future next_id into the DB.

:rtype: int
:returns: the next history id
"""
session = object_session(self)
table = self.table
if "postgres" not in session.bind.dialect.name:
next_hid = select([table.c.hid_counter], table.c.id == model.cached_id(self)).with_for_update().scalar()
table.update(table.c.id == self.id).execute(hid_counter=(next_hid + n))
else:
stmt = table.update().where(table.c.id == model.cached_id(self)).values(hid_counter=(table.c.hid_counter + n)).returning(table.c.hid_counter)
next_hid = session.execute(stmt).scalar() - n
return next_hid


model.History._next_hid = db_next_hid # type: ignore


def _workflow_invocation_update(self):
session = object_session(self)
table = self.table
Expand Down
27 changes: 26 additions & 1 deletion test/unit/data/test_galaxy_mapping.py
Expand Up @@ -621,7 +621,7 @@ def get_latest_entry(entries):
self.new_hda(h1, name="1")
self.new_hda(h2, name="2")
self.session().flush()
# db_next_hid modifies history, plus trigger on HDA means 2 additional audit rows per history
# _next_hid modifies history, plus trigger on HDA means 2 additional audit rows per history

h1_audits = get_audit_table_entries(h1)
h2_audits = get_audit_table_entries(h2)
Expand Down Expand Up @@ -920,6 +920,31 @@ def test_can_manage_private_dataset(self):
assert security_agent.can_manage_dataset(u_from.all_roles(), d1.dataset)
assert not security_agent.can_manage_dataset(u_other.all_roles(), d1.dataset)

def test_history_hid_counter_is_expired_after_next_hid_call(self):
u = model.User(email="hid_abuser@example.com", password="password")
h = model.History(name="History for hid testing", user=u)
self.persist(u, h)
state = inspect(h)
assert h.hid_counter == 1
assert 'hid_counter' not in state.unloaded
assert 'id' not in state.unloaded

h._next_hid()

assert 'hid_counter' in state.unloaded # this attribute has been expired
assert 'id' not in state.unloaded # but other attributes have NOT been expired
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, so because we bypass the current session completely this works. That's neat!

Copy link
Member

@mvdbeek mvdbeek Nov 19, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And if it didn't it would've probably wreaked havoc in the tests, where a lot of HDAs would be committed with a hid, which would raise exceptions now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep! I also turned on echo=True for the engine when I was working on this to make sure there were no surprises.

assert h.hid_counter == 2 # check this last: this causes thie hid_counter to be reloaded

def test_next_hid(self):
u = model.User(email="hid_abuser@example.com", password="password")
h = model.History(name="History for hid testing", user=u)
self.persist(u, h)
assert h.hid_counter == 1
h._next_hid()
assert h.hid_counter == 2
h._next_hid(n=3)
assert h.hid_counter == 5

def _three_users(self, suffix):
email_from = f"user_{suffix}e1@example.com"
email_to = f"user_{suffix}e2@example.com"
Expand Down