Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix search on private worksheets #4432

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions codalab/model/bundle_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,19 +633,26 @@ def add_join(table: Table, condition: Any, left_outer_join: bool = False):

if user_id != self.root_user_id:
# Restrict to the bundles that we have access to.
aliased_group_bundle_permission = aliased(cl_group_bundle_permission)
add_join(
cl_group_bundle_permission,
cl_bundle.c.uuid == cl_group_bundle_permission.c.object_uuid,
aliased_group_bundle_permission,
cl_bundle.c.uuid == aliased_group_bundle_permission.c.object_uuid,
left_outer_join=True,
)
aliased_cl_user_group = aliased(cl_user_group)
add_join(
aliased_cl_user_group,
aliased_cl_user_group.c.user_id == user_id,
left_outer_join=True,
Comment on lines +645 to +655
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use aliased here to account for the case when we've already done a join on the group_bundle_permission table or the user_group table. (If you do that twice without aliasing one of them, the query is invalid according to MySQL syntax rules.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does aliased automatically pick a unique name, or do you need to explicitly pick a name?

In a different PR, we should probably add validation when we do the joins to make sure that we're not joining onto the same table twice.

)
add_join(cl_user_group, cl_user_group.c.user_id == user_id, left_outer_join=True)
access_via_owner = cl_bundle.c.owner_id == user_id
access_via_group = and_(
or_( # Join constraint (group)
cl_group_bundle_permission.c.group_uuid
aliased_group_bundle_permission.c.group_uuid
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, is there a potential false positive case here (access_via_group is incorrectly true) if aliased_group_bundle_permission.c.group_uuid and self.public_group_uuid are both None?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking about this a bit more, this logic seems incorrect?

  • aliased_group_bundle_permission is all the group bundle permissions for the object
  • aliased_cl_user_group is all the group memberships for the user
  • aliased_cl_user_group.c.user_id == user_id doesn't do anything because this is already enforced by the join.
  • So it seems like if the bundle grants access to some group A, and the user X belongs to some group B, then user X incorrectly gets access to the bundle?
  • Seems like what we're missing here is we need to check that aliased_group_bundle_permission.c.group_id == aliased_cl_user_group.c.group_id?
  • Alternatively you can push this constraint into the join condition i.e. when you in onto aliased_group_bundle_permission, join using the condition that the bundle permission group is the user's groups or the public group only?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you're totally correct. Thanks for catching this! I updated it. I believe it should be correct now.

== self.public_group_uuid, # Public group
cl_user_group.c.user_id == user_id,
aliased_cl_user_group.c.user_id == user_id,
),
cl_group_bundle_permission.c.permission
aliased_group_bundle_permission.c.permission
>= GROUP_OBJECT_PERMISSION_READ, # Match the uuid of the parent
)

Expand Down
127 changes: 68 additions & 59 deletions tests/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1839,50 +1839,11 @@ def test_perm(ctx):

@TestModule.register('search')
def test_search(ctx):
name = random_name()
uuid1 = _run_command([cl, 'upload', test_path('a.txt'), '-n', name])
uuid2 = _run_command([cl, 'upload', test_path('b.txt'), '-n', name])
check_equals(uuid1, _run_command([cl, 'search', uuid1, '-u']))
check_equals(
uuid1[:8], _run_command([cl, 'search', 'uuid=' + uuid1, '-f', 'uuid']).split("\n")[2]
)
check_equals(uuid1, _run_command([cl, 'search', 'uuid=' + uuid1, '-u']))
check_equals('', _run_command([cl, 'search', 'uuid=' + uuid1[0:8], '-u']))
check_equals(uuid1, _run_command([cl, 'search', 'uuid=' + uuid1[0:8] + '.*', '-u']))
check_equals(uuid1, _run_command([cl, 'search', 'uuid=' + uuid1[0:8] + '%', '-u']))
check_equals(uuid1, _run_command([cl, 'search', 'uuid=' + uuid1, 'name=' + name, '-u']))
check_equals(
uuid1 + '\n' + uuid2, _run_command([cl, 'search', 'name=' + name, 'id=.sort', '-u'])
)
check_equals(
uuid1 + '\n' + uuid2,
_run_command([cl, 'search', 'uuid=' + uuid1 + ',' + uuid2, 'id=.sort', '-u']),
)
check_equals(
uuid2 + '\n' + uuid1, _run_command([cl, 'search', 'name=' + name, 'id=.sort-', '-u'])
)
check_equals('2', _run_command([cl, 'search', 'name=' + name, '.count']))
size1 = float(_run_command([cl, 'info', '-f', 'data_size', uuid1]))
size2 = float(_run_command([cl, 'info', '-f', 'data_size', uuid2]))
check_equals(
size1 + size2, float(_run_command([cl, 'search', 'name=' + name, 'data_size=.sum']))
)
# Check floating
check_equals('', _run_command([cl, 'search', '.floating', '-u']))
uuid3 = _run_command([cl, 'upload', test_path('a.txt')])
_run_command([cl, 'detach', uuid3], 0)
check_equals(uuid3, _run_command([cl, 'search', '.floating', '-u']))
_run_command([cl, 'rm', uuid3]) # need to remove since not on main worksheet
# Check search when groups empty
check_equals('', _run_command([cl, 'search', '.shared']))
# Check search with non-root user.
if not os.getenv('CODALAB_PROTECTED_MODE'):
# This test does not work when protected_mode is True.
_, current_user_name = current_user()
user_name = 'non_root_user_' + random_name()
create_user(ctx, user_name, disk_quota='2000')
switch_user(user_name)
check_equals(uuid1, _run_command([cl, 'search', 'uuid=' + uuid1, '-u']))
def test_search_helper(ctx):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

define a new helper function (so we can run the same tests multiple times)

"""Basic Search and info"""
name = random_name()
uuid1 = _run_command([cl, 'upload', test_path('a.txt'), '-n', name])
uuid2 = _run_command([cl, 'upload', test_path('b.txt'), '-n', name])
check_equals(uuid1, _run_command([cl, 'search', uuid1, '-u']))
check_equals(
uuid1[:8], _run_command([cl, 'search', 'uuid=' + uuid1, '-f', 'uuid']).split("\n")[2]
Expand All @@ -1892,22 +1853,70 @@ def test_search(ctx):
check_equals(uuid1, _run_command([cl, 'search', 'uuid=' + uuid1[0:8] + '.*', '-u']))
check_equals(uuid1, _run_command([cl, 'search', 'uuid=' + uuid1[0:8] + '%', '-u']))
check_equals(uuid1, _run_command([cl, 'search', 'uuid=' + uuid1, 'name=' + name, '-u']))
check_equals(
uuid1 + '\n' + uuid2, _run_command([cl, 'search', 'name=' + name, 'id=.sort', '-u'])
)
check_equals(
uuid1 + '\n' + uuid2,
_run_command([cl, 'search', 'uuid=' + uuid1 + ',' + uuid2, 'id=.sort', '-u']),
)
check_equals(
uuid2 + '\n' + uuid1, _run_command([cl, 'search', 'name=' + name, 'id=.sort-', '-u'])
)
check_equals('2', _run_command([cl, 'search', 'name=' + name, '.count']))
size1 = float(_run_command([cl, 'info', '-f', 'data_size', uuid1]))
size2 = float(_run_command([cl, 'info', '-f', 'data_size', uuid2]))
check_equals(
size1 + size2, float(_run_command([cl, 'search', 'name=' + name, 'data_size=.sum']))
)

"""Search for floating bundles"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I think we use comment style # rather than docstrings for mid-method comments?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aliased does automatically pick a unique name, so there's no issue with table name collisions or joining on the same table twice That's actually what I meant when I said the query was invalid otherwise; before I added aliased, sometimes it would join on the same table twice and MySQL actually throws an error when that happens. So we should be OK (if I'm understanding your comment correctly).

For the # comments style, I updated that.

check_equals('', _run_command([cl, 'search', '.floating', '-u']))
uuid3 = _run_command([cl, 'upload', test_path('a.txt')])
_run_command([cl, 'detach', uuid3], 0)
check_equals(uuid3, _run_command([cl, 'search', '.floating', '-u']))
_run_command([cl, 'rm', uuid3]) # need to remove since not on main worksheet

"""Search bundles on private worksheets"""
wuuid = _run_command([cl, 'work', '-u'])
new_wuuid = _run_command([cl, 'new', random_name()])
_run_command([cl, 'wperm', new_wuuid, 'public', 'n']) # make worksheet private
_run_command([cl, 'work', new_wuuid]) # switch to worksheet
uuid = _run_command([cl, 'upload', test_path('a.txt')])
check_equals(uuid, _run_command([cl, 'search', uuid, '-u']))
_run_command([cl, 'work', wuuid])
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the new test added for search on private worksheets


"""Search with groups"""
# Empty group
check_equals('', _run_command([cl, 'search', '.shared']))
group_bname = random_name()
group_buuid = _run_command([cl, 'upload', test_path('a.txt'), '-n', group_bname])
ctx.collect_bundle(group_buuid)
user_id, user_name = current_user()
# Create new group
group_name = random_name()
group_uuid = create_group(ctx, group_name)
# Make bundle unavailable to public but available to the group
_run_command([cl, 'perm', group_buuid, 'public', 'n'])
_run_command([cl, 'perm', group_buuid, group_name, 'r'])
check_contains(group_buuid[:8], _run_command([cl, 'search', '.shared']))
check_contains(group_buuid[:8], _run_command([cl, 'search', 'group={}'.format(group_uuid)]))
check_contains(group_buuid[:8], _run_command([cl, 'search', 'group={}'.format(group_name)]))

# Test with root user.
test_search_helper(ctx)

# Test with non-root user.
if not os.getenv('CODALAB_PROTECTED_MODE'):
# This test does not work when protected_mode is True.
_, current_user_name = current_user()
user_name = 'non_root_user_' + random_name()
create_user(ctx, user_name, disk_quota='1000000')
switch_user(user_name)
new_wuuid = _run_command([cl, 'new', random_name()])
_run_command([cl, 'work', new_wuuid])
test_search_helper(ctx)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Call helper function here to actually run the tests. Note we call the same helper for the root and non-root user, so the exact same tests are performed in both cases

switch_user(current_user_name)
# Check search by group
group_bname = random_name()
group_buuid = _run_command([cl, 'run', 'echo hello', '-n', group_bname])
wait(group_buuid)
ctx.collect_bundle(group_buuid)
user_id, user_name = current_user()
# Create new group
group_name = random_name()
group_uuid = create_group(ctx, group_name)
# Make bundle unavailable to public but available to the group
_run_command([cl, 'perm', group_buuid, 'public', 'n'])
_run_command([cl, 'perm', group_buuid, group_name, 'r'])
check_contains(group_buuid[:8], _run_command([cl, 'search', '.shared']))
check_contains(group_buuid[:8], _run_command([cl, 'search', 'group={}'.format(group_uuid)]))
check_contains(group_buuid[:8], _run_command([cl, 'search', 'group={}'.format(group_name)]))


@TestModule.register('search_time')
Expand Down