Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[22.01] Fix maintenance script bug #15222

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
36 changes: 18 additions & 18 deletions scripts/cleanup_datasets/admin_cleanup_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,15 +176,15 @@ def administrative_delete_datasets(app, cutoff_time, cutoff_days,
# We really only need the id column here, but sqlalchemy barfs when
# trying to select only 1 column
hda_ids_query = sa.select(
(app.model.HistoryDatasetAssociation.table.c.id,
app.model.HistoryDatasetAssociation.table.c.deleted),
(app.model.HistoryDatasetAssociation.__table__.c.id,
app.model.HistoryDatasetAssociation.__table__.c.deleted),
whereclause=and_(
app.model.Dataset.table.c.deleted == false(),
app.model.HistoryDatasetAssociation.table.c.update_time < cutoff_time,
app.model.HistoryDatasetAssociation.table.c.deleted == false()),
app.model.Dataset.__table__.c.deleted == false(),
app.model.HistoryDatasetAssociation.__table__.c.update_time < cutoff_time,
app.model.HistoryDatasetAssociation.__table__.c.deleted == false()),
from_obj=[sa.outerjoin(
app.model.Dataset.table,
app.model.HistoryDatasetAssociation.table)])
app.model.Dataset.__table__,
app.model.HistoryDatasetAssociation.__table__)])

# Add all datasets associated with Histories to our list
hda_ids = []
Expand All @@ -206,19 +206,19 @@ def administrative_delete_datasets(app, cutoff_time, cutoff_days,
# Process each of the Dataset objects
for hda_id in hda_ids:
user_query = sa.select(
[app.model.HistoryDatasetAssociation.table,
app.model.History.table,
app.model.User.table],
[app.model.HistoryDatasetAssociation.__table__,
app.model.History.__table__,
app.model.User.__table__],
whereclause=and_(
app.model.HistoryDatasetAssociation.table.c.id == hda_id),
from_obj=[sa.join(app.model.User.table,
app.model.History.table)
.join(app.model.HistoryDatasetAssociation.table)],
app.model.HistoryDatasetAssociation.__table__.c.id == hda_id),
from_obj=[sa.join(app.model.User.__table__,
app.model.History.__table__)
.join(app.model.HistoryDatasetAssociation.__table__)],
use_labels=True)
for result in app.sa_session.execute(user_query):
user_notifications[result[app.model.User.table.c.email]].append(
(result[app.model.HistoryDatasetAssociation.table.c.name],
result[app.model.History.table.c.name]))
user_notifications[result[app.model.User.__table__.c.email]].append(
(result[app.model.HistoryDatasetAssociation.__table__.c.name],
result[app.model.History.__table__.c.name]))
deleted_instance_count += 1
if not info_only and not email_only:
# Get the HistoryDatasetAssociation objects
Expand Down Expand Up @@ -263,7 +263,7 @@ def _get_tool_id_for_hda(app, hda_id):
return None
job = app.sa_session.query(app.model.Job).\
join(app.model.JobToOutputDatasetAssociation).\
filter(app.model.JobToOutputDatasetAssociation.table.c.dataset_id
filter(app.model.JobToOutputDatasetAssociation.__table__.c.dataset_id
== hda_id).first()
if job is not None:
return job.tool_id
Expand Down
94 changes: 47 additions & 47 deletions scripts/cleanup_datasets/cleanup_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,12 @@ def delete_userless_histories(app, cutoff_time, info_only=False, force_retry=Fal
start = time.time()
if force_retry:
histories = app.sa_session.query(app.model.History) \
.filter(and_(app.model.History.table.c.user_id == null(),
.filter(and_(app.model.History.__table__.c.user_id == null(),
app.model.History.update_time < cutoff_time))
else:
histories = app.sa_session.query(app.model.History) \
.filter(and_(app.model.History.table.c.user_id == null(),
app.model.History.table.c.deleted == false(),
.filter(and_(app.model.History.__table__.c.user_id == null(),
app.model.History.__table__.c.deleted == false(),
app.model.History.update_time < cutoff_time))
for history in histories:
if not info_only:
Expand All @@ -170,13 +170,13 @@ def purge_histories(app, cutoff_time, remove_from_disk, info_only=False, force_r
start = time.time()
if force_retry:
histories = app.sa_session.query(app.model.History) \
.filter(and_(app.model.History.table.c.deleted == true(),
.filter(and_(app.model.History.__table__.c.deleted == true(),
app.model.History.update_time < cutoff_time)) \
.options(eagerload('datasets'))
else:
histories = app.sa_session.query(app.model.History) \
.filter(and_(app.model.History.table.c.deleted == true(),
app.model.History.table.c.purged == false(),
.filter(and_(app.model.History.__table__.c.deleted == true(),
app.model.History.__table__.c.purged == false(),
app.model.History.update_time < cutoff_time)) \
.options(eagerload('datasets'))
for history in histories:
Expand Down Expand Up @@ -212,13 +212,13 @@ def purge_libraries(app, cutoff_time, remove_from_disk, info_only=False, force_r
start = time.time()
if force_retry:
libraries = app.sa_session.query(app.model.Library) \
.filter(and_(app.model.Library.table.c.deleted == true(),
app.model.Library.table.c.update_time < cutoff_time))
.filter(and_(app.model.Library.__table__.c.deleted == true(),
app.model.Library.__table__.c.update_time < cutoff_time))
else:
libraries = app.sa_session.query(app.model.Library) \
.filter(and_(app.model.Library.table.c.deleted == true(),
app.model.Library.table.c.purged == false(),
app.model.Library.table.c.update_time < cutoff_time))
.filter(and_(app.model.Library.__table__.c.deleted == true(),
app.model.Library.__table__.c.purged == false(),
app.model.Library.__table__.c.update_time < cutoff_time))
for library in libraries:
_purge_folder(library.root_folder, app, remove_from_disk, info_only=info_only)
if not info_only:
Expand All @@ -243,13 +243,13 @@ def purge_folders(app, cutoff_time, remove_from_disk, info_only=False, force_ret
start = time.time()
if force_retry:
folders = app.sa_session.query(app.model.LibraryFolder) \
.filter(and_(app.model.LibraryFolder.table.c.deleted == true(),
app.model.LibraryFolder.table.c.update_time < cutoff_time))
.filter(and_(app.model.LibraryFolder.__table__.c.deleted == true(),
app.model.LibraryFolder.__table__.c.update_time < cutoff_time))
else:
folders = app.sa_session.query(app.model.LibraryFolder) \
.filter(and_(app.model.LibraryFolder.table.c.deleted == true(),
app.model.LibraryFolder.table.c.purged == false(),
app.model.LibraryFolder.table.c.update_time < cutoff_time))
.filter(and_(app.model.LibraryFolder.__table__.c.deleted == true(),
app.model.LibraryFolder.__table__.c.purged == false(),
app.model.LibraryFolder.__table__.c.update_time < cutoff_time))
for folder in folders:
_purge_folder(folder, app, remove_from_disk, info_only=info_only)
folder_count += 1
Expand All @@ -263,30 +263,30 @@ def delete_datasets(app, cutoff_time, remove_from_disk, info_only=False, force_r
# Marks datasets as deleted if associated items are all deleted.
start = time.time()
if force_retry:
history_dataset_ids_query = sa.select((app.model.Dataset.table.c.id,
app.model.Dataset.table.c.state),
whereclause=app.model.HistoryDatasetAssociation.table.c.update_time < cutoff_time,
from_obj=[sa.outerjoin(app.model.Dataset.table,
app.model.HistoryDatasetAssociation.table)])
library_dataset_ids_query = sa.select((app.model.LibraryDataset.table.c.id,
app.model.LibraryDataset.table.c.deleted),
whereclause=app.model.LibraryDataset.table.c.update_time < cutoff_time,
from_obj=[app.model.LibraryDataset.table])
history_dataset_ids_query = sa.select((app.model.Dataset.__table__.c.id,
app.model.Dataset.__table__.c.state),
whereclause=app.model.HistoryDatasetAssociation.__table__.c.update_time < cutoff_time,
from_obj=[sa.outerjoin(app.model.Dataset.__table__,
app.model.HistoryDatasetAssociation.__table__)])
library_dataset_ids_query = sa.select((app.model.LibraryDataset.__table__.c.id,
app.model.LibraryDataset.__table__.c.deleted),
whereclause=app.model.LibraryDataset.__table__.c.update_time < cutoff_time,
from_obj=[app.model.LibraryDataset.__table__])
else:
# We really only need the id column here, but sqlalchemy barfs when trying to select only 1 column
history_dataset_ids_query = sa.select((app.model.Dataset.table.c.id,
app.model.Dataset.table.c.state),
whereclause=and_(app.model.Dataset.table.c.deleted == false(),
app.model.HistoryDatasetAssociation.table.c.update_time < cutoff_time,
app.model.HistoryDatasetAssociation.table.c.deleted == true()),
from_obj=[sa.outerjoin(app.model.Dataset.table,
app.model.HistoryDatasetAssociation.table)])
library_dataset_ids_query = sa.select((app.model.LibraryDataset.table.c.id,
app.model.LibraryDataset.table.c.deleted),
whereclause=and_(app.model.LibraryDataset.table.c.deleted == true(),
app.model.LibraryDataset.table.c.purged == false(),
app.model.LibraryDataset.table.c.update_time < cutoff_time),
from_obj=[app.model.LibraryDataset.table])
history_dataset_ids_query = sa.select((app.model.Dataset.__table__.c.id,
app.model.Dataset.__table__.c.state),
whereclause=and_(app.model.Dataset.__table__.c.deleted == false(),
app.model.HistoryDatasetAssociation.__table__.c.update_time < cutoff_time,
app.model.HistoryDatasetAssociation.__table__.c.deleted == true()),
from_obj=[sa.outerjoin(app.model.Dataset.__table__,
app.model.HistoryDatasetAssociation.__table__)])
library_dataset_ids_query = sa.select((app.model.LibraryDataset.__table__.c.id,
app.model.LibraryDataset.__table__.c.deleted),
whereclause=and_(app.model.LibraryDataset.__table__.c.deleted == true(),
app.model.LibraryDataset.__table__.c.purged == false(),
app.model.LibraryDataset.__table__.c.update_time < cutoff_time),
from_obj=[app.model.LibraryDataset.__table__])
deleted_dataset_count = 0
deleted_instance_count = 0
skip = []
Expand Down Expand Up @@ -353,15 +353,15 @@ def purge_datasets(app, cutoff_time, remove_from_disk, info_only=False, force_re
start = time.time()
if force_retry:
datasets = app.sa_session.query(app.model.Dataset) \
.filter(and_(app.model.Dataset.table.c.deleted == true(),
app.model.Dataset.table.c.purgable == true(),
app.model.Dataset.table.c.update_time < cutoff_time))
.filter(and_(app.model.Dataset.__table__.c.deleted == true(),
app.model.Dataset.__table__.c.purgable == true(),
app.model.Dataset.__table__.c.update_time < cutoff_time))
else:
datasets = app.sa_session.query(app.model.Dataset) \
.filter(and_(app.model.Dataset.table.c.deleted == true(),
app.model.Dataset.table.c.purgable == true(),
app.model.Dataset.table.c.purged == false(),
app.model.Dataset.table.c.update_time < cutoff_time))
.filter(and_(app.model.Dataset.__table__.c.deleted == true(),
app.model.Dataset.__table__.c.purgable == true(),
app.model.Dataset.__table__.c.purged == false(),
app.model.Dataset.__table__.c.update_time < cutoff_time))
for dataset in datasets:
file_size = dataset.file_size
_purge_dataset(app, dataset, remove_from_disk, info_only=info_only)
Expand Down Expand Up @@ -418,11 +418,11 @@ def _delete_dataset(dataset, app, remove_from_disk, info_only=False, is_deletabl
# lets create a list of metadata files, then perform actions on them
for hda in dataset.history_associations:
for metadata_file in app.sa_session.query(app.model.MetadataFile) \
.filter(app.model.MetadataFile.table.c.hda_id == hda.id):
.filter(app.model.MetadataFile.__table__.c.hda_id == hda.id):
metadata_files.append(metadata_file)
for ldda in dataset.library_associations:
for metadata_file in app.sa_session.query(app.model.MetadataFile) \
.filter(app.model.MetadataFile.table.c.lda_id == ldda.id):
.filter(app.model.MetadataFile.__table__.c.lda_id == ldda.id):
metadata_files.append(metadata_file)
for metadata_file in metadata_files:
op_description = "marked as deleted"
Expand Down