Skip to content

Commit

Permalink
add endpoint /admin/cleanuptasks
Browse files Browse the repository at this point in the history
  • Loading branch information
dchhabda committed Oct 19, 2022
1 parent 9a1f7ad commit 7da4ca5
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 40 deletions.
6 changes: 2 additions & 4 deletions create_bulk_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def create_task(project_id, created, num_answers, priority, year):
"""
columns_values = dict(
created=created,
project_id=project_id, state="complete",
project_id=project_id, state="completed",
info=json.dumps({"company": "bberg", "earning_year": year}),
exported=True, n_answers=num_answers, priority_0=priority,
quorum=0, calibration=0
Expand Down Expand Up @@ -139,9 +139,7 @@ def setup_args():
parser = argparse.ArgumentParser()
parser.add_argument("-n", "--numtasks", dest="num_tasks", type=int, required=True, help="number of tasks to create")
parser.add_argument("-p", "--projectid", dest="project_id", type=int, required=True, help="project id under which tasks to create")
parser.add_argument("--random-year", dest="random_year", action="store_true", help="generate tasks with random year for task created date")
parser.add_argument("--no-random-year", dest="random_year", action="store_false", help="generate tasks with current year for task created date; no random year")
parser.set_defaults(random_year=True) # create tasks with created date having random year number
parser.add_argument("--random-year", dest="random_year", action="store_true", help="create tasks with random year with parameter passed, else current year")
args = parser.parse_args()
return args

Expand Down
9 changes: 2 additions & 7 deletions pybossa/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,17 +763,15 @@ def setup_scheduled_jobs(app): # pragma: no cover
"""Setup scheduled jobs."""
from datetime import datetime
from pybossa.jobs import enqueue_periodic_jobs, schedule_job, \
get_quarterly_date, get_saturday_4pm_date
get_quarterly_date
from rq_scheduler import Scheduler
redis_conn = sentinel.master
scheduler = Scheduler(queue_name='scheduled_jobs', connection=redis_conn)
MINUTE = 60
HOUR = 60 * 60
WEEK = 7 * (24 * HOUR)
MONTH = 30 * (24 * HOUR)

first_quaterly_execution = get_quarterly_date(datetime.utcnow())
saturday_4pm = get_saturday_4pm_date(datetime.utcnow())
JOBS = [dict(name=enqueue_periodic_jobs, args=['email'], kwargs={},
interval=(1 * MINUTE), timeout=(10 * MINUTE)),
dict(name=enqueue_periodic_jobs, args=['maintenance'], kwargs={},
Expand All @@ -790,10 +788,7 @@ def setup_scheduled_jobs(app): # pragma: no cover
interval=(1 * MONTH), timeout=(30 * MINUTE)),
dict(name=enqueue_periodic_jobs, args=['quaterly'], kwargs={},
interval=(3 * MONTH), timeout=(30 * MINUTE),
scheduled_time=first_quaterly_execution),
dict(name=enqueue_periodic_jobs, args=['weekly'], kwargs={},
interval=(WEEK), timeout=(30 * MINUTE),
scheduled_time=saturday_4pm)]
scheduled_time=first_quaterly_execution)]

for job in JOBS:
schedule_job(job, scheduler)
Expand Down
32 changes: 16 additions & 16 deletions pybossa/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,11 +164,10 @@ def get_periodic_jobs(queue):
leaderboard_jobs = get_leaderboard_jobs() if queue == 'super' else []
weekly_update_jobs = get_weekly_stats_update_projects() if queue == 'low' else []
failed_jobs = get_maintenance_jobs() if queue == 'maintenance' else []
completed_tasks_cleanup_job = get_completed_tasks_cleaup_jobs() if queue == "weekly" else []
# completed_tasks_cleanup_job = get_completed_tasks_cleaup_jobs() if queue == 'weekly' else [] # TODO: uncomment in future PR
_all = [jobs, admin_report_jobs, project_jobs, autoimport_jobs,
engage_jobs, non_contrib_jobs, dashboard_jobs,
weekly_update_jobs, failed_jobs, leaderboard_jobs,
completed_tasks_cleanup_job]
weekly_update_jobs, failed_jobs, leaderboard_jobs]
return (job for sublist in _all for job in sublist if job['queue'] == queue)


Expand Down Expand Up @@ -1541,15 +1540,16 @@ def respond_csv():
send_mail(mail_dict)


def get_completed_tasks_cleaup_jobs(queue="weekly"):
"""Return job that will perform cleanup of completed tasks."""
timeout = current_app.config.get('TIMEOUT')
job = dict(name=perform_completed_tasks_cleanup,
args=[],
kwargs={},
timeout=timeout,
queue=queue)
yield job
# TODO: uncomment, reuse this under future PR
# def get_completed_tasks_cleaup_jobs(queue="weekly"):
# """Return job that will perform cleanup of completed tasks."""
# timeout = current_app.config.get('TIMEOUT')
# job = dict(name=perform_completed_tasks_cleanup,
# args=[],
# kwargs={},
# timeout=timeout,
# queue=queue)
# yield job


def perform_completed_tasks_cleanup():
Expand All @@ -1575,8 +1575,8 @@ def perform_completed_tasks_cleanup():
cleanup_days = -1
if cleanup_days not in valid_days:
current_app.logger.info(
f"Skipping project cleanup days due to invalid configuration,"
f"project id {project_id}, completed_tasks_cleanup_days {cleanup_days}, valid days {valid_days}"
f"Skipping project cleanup days due to invalid cleanup days,"
f"project id {project_id}, completed_tasks_cleanup_days {row.cleanup_days}, valid days {valid_days}"
)
else:
projects.append((project_id, cleanup_days))
Expand All @@ -1593,7 +1593,7 @@ def perform_completed_tasks_cleanup():
params = dict(project_id=project_id, state="completed", duration=cleanup_days)
results = db.slave_session.execute(sql, params)
total_tasks = results.rowcount if results else 0
current_app.logger.info(f"Performing completed tasks cleanup for project {project_id}. Total tasks: {total_tasks}")
current_app.logger.info(f"Performing cleanup of {total_tasks} completed tasks for project {project_id} that are older than {cleanup_days} days or more.")
for row in results:
purge_task_data(row.task_id, project_id)
current_app.logger.info(f"Performing completed tasks cleanup for project {project_id}")
current_app.logger.info(f"Finished cleanup of completed tasks for project {project_id}")
14 changes: 13 additions & 1 deletion pybossa/view/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
from pybossa.forms.admin_view_forms import *
from pybossa.importers import BulkImportException
from pybossa.jobs import get_dashboard_jobs, export_all_users, \
load_management_dashboard_data
load_management_dashboard_data, perform_completed_tasks_cleanup
from pybossa.jobs import get_management_dashboard_stats
from pybossa.jobs import send_mail
from pybossa.model import make_timestamp
Expand Down Expand Up @@ -835,3 +835,15 @@ def disable_user(user_id=None):
return redirect(url_for('.manageusers'))
msg = "User not found"
return format_error(msg, 404)


@blueprint.route('/cleanuptasks', methods=['GET', 'POST'])
@login_required
@admin_required
def cleanuptasks():
"""Perform completed tasks cleanup."""
current_app.logger.info(f"User {current_user.name} ({current_user.email_addr}) initiated cleanup of completed tasks")
perform_completed_tasks_cleanup()
markup = Markup(' {} ')
flash(markup.format(gettext("Cleanup of completed tasks complete")))
return redirect(url_for('.index'))
9 changes: 9 additions & 0 deletions test/test_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -1480,3 +1480,12 @@ def test_announcement_delete_json(self):
assert data['status'] == 'success'
announcements = announcement_repo.get_all_announcements()
assert len(announcements) == 0, announcements

@with_context
@patch('pybossa.view.admin.perform_completed_tasks_cleanup')
def test_admin_cleanuptasks(self, mock_cleanuptasks):
"""Test ADMIN JSON index page works"""
self.register()
self.signin()
res = self.app_get_json("/admin/cleanuptasks")
mock_cleanuptasks.assert_called()
88 changes: 76 additions & 12 deletions test/test_jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,81 @@ def test_completed_tasks_cleanup(self, mock_purge_tasks):
actual_calls_params.append(call[0])
assert (task2_id, project_id) not in actual_calls_params, "Task id 2 should not be purged"

@with_context
def test_saturday_4pm_date(self):
"""Test date generated is saturday 4 pm date from a given date."""
date1 = datetime.strptime("2022-10-12 10:00PM", "%Y-%m-%d %I:%M%p")
saturday = get_saturday_4pm_date(date1)
assert saturday.strftime("%Y-%m-%d %H:%M:%S") == "2022-10-15 16:00:00"
# test with some other date
date2 = datetime.strptime("2026-01-31 10:00AM", "%Y-%m-%d %I:%M%p")
saturday = get_saturday_4pm_date(date2)
assert saturday.strftime("%Y-%m-%d %H:%M:%S") == "2026-01-31 16:00:00"

@with_context
@patch('pybossa.jobs.purge_task_data')
def test_completed_tasks_cleanup_bad_config(self, mock_purge_tasks):
"""Test completed_tasks_cleanup deletes tasks qualify for deletion."""

from flask import current_app
current_app.config['COMPLETED_TASK_CLEANUP_DAYS'] = [(None, None)]
perform_completed_tasks_cleanup()
assert not mock_purge_tasks.called

@with_context
@patch('pybossa.jobs.purge_task_data')
def test_completed_tasks_cleanup_bad_project_config(self, mock_purge_tasks):
"""Test completed_tasks_cleanup deletes tasks qualify for deletion."""

from flask import current_app
current_app.config['COMPLETED_TASK_CLEANUP_DAYS'] = [(30, "30 days"), (60, "60 days")]
ProjectFactory.create(info=dict(completed_tasks_cleanup_days=240))
ProjectFactory.create(info=dict(completed_tasks_cleanup_days="xyz"))
perform_completed_tasks_cleanup()
assert not mock_purge_tasks.called

# TODO: uncomment after tests database can be upgraded similar to pybossa database
# this test performs end to end testing archiving data to tables and cleaning up
# archive tables from test db upon testing complete for future test runs to be successful
# mock_purge_tasks can be removed with task data cleanup and archive happening in actual
# @with_context
# @patch('pybossa.jobs.purge_task_data')
# def test_completed_tasks_cleanup(self, mock_purge_tasks):
# """Test completed_tasks_cleanup deletes tasks qualify for deletion."""

# cleanup_days = 30
# project = ProjectFactory.create(info=dict(completed_tasks_cleanup_days=cleanup_days))

# # task creation dates. generate sample tasks
# # 2 tasks completed are with creation date more than 30 days from current date
# # 1 task completed and is with current creation date
# now = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S')
# created = (datetime.utcnow() - timedelta(60)).strftime('%Y-%m-%dT%H:%M:%S')
# past_30days = (datetime.utcnow() - timedelta(30)).strftime('%Y-%m-%dT%H:%M:%S')

# task1 = TaskFactory.create(project=project, created=past_30days, n_answers=2, state="completed")
# task2 = TaskFactory.create(project=project, created=now, n_answers=4, state="completed")
# task3 = TaskFactory.create(project=project, created=past_30days, n_answers=3, state="completed")

# TaskRunFactory.create_batch(2, project=project, created=created, finish_time=now, task=task1)
# TaskRunFactory.create_batch(4, project=project, created=created, finish_time=now, task=task2)
# TaskRunFactory.create_batch(3, project=project, created=created, finish_time=now, task=task3)

# task1_id, task2_id, task3_id = task1.id, task2.id, task3.id
# project_id = project.id
# perform_completed_tasks_cleanup()
# assert mock_purge_tasks.call_count == 2
# # task 1 and task 3 would be cleaned up as they are completed
# # and 30 days old hence qualifying for deletion.
# # task 2 though complete is less than 30 days old, hence not
# # get called for deletion
# expected_calls_params = [(task3_id, project.id), (task1_id, project.id)]
# actual_calls_params = []
# for call in mock_purge_tasks.call_args_list:
# assert call[0] in expected_calls_params
# actual_calls_params.append(call[0])
# assert (task2_id, project_id) not in actual_calls_params, "Task id 2 should not be purged"

# To test actual task cleanup against database, make sure to have
# archived tables created in you local test database.
# Uncommment following code before running tests locally
Expand Down Expand Up @@ -342,15 +417,4 @@ def test_completed_tasks_cleanup(self, mock_purge_tasks):
# db.conn.commit()
# sql = f"DELETE FROM result_archived WHERE task_id IN({task1_id}, {task2_id}, {task3_id});"
# db.execute_sql(sql)
# db.conn.commit()

@with_context
def test_saturday_4pm_date(self):
"""Test date generated is saturday 4 pm date from a given date."""
date1 = datetime.strptime("2022-10-12 10:00PM", "%Y-%m-%d %I:%M%p")
saturday = get_saturday_4pm_date(date1)
assert saturday.strftime("%Y-%m-%d %H:%M:%S") == "2022-10-15 16:00:00"
# test with some other date
date2 = datetime.strptime("2026-01-31 10:00AM", "%Y-%m-%d %I:%M%p")
saturday = get_saturday_4pm_date(date2)
assert saturday.strftime("%Y-%m-%d %H:%M:%S") == "2026-01-31 16:00:00"
# db.conn.commit()

0 comments on commit 7da4ca5

Please sign in to comment.