Skip to content

Commit

Permalink
feat: failng scheduled jobs
Browse files Browse the repository at this point in the history
  • Loading branch information
ankush committed Apr 22, 2024
1 parent 9154e42 commit c712780
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 8 deletions.
24 changes: 16 additions & 8 deletions frappe/desk/doctype/system_health_report/system_health_report.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@
"field_order": [
"background_jobs_tab",
"background_jobs_section",
"scheduler_status",
"column_break_klex",
"total_background_workers",
"column_break_mney",
"column_break_klex",
"background_jobs_check",
"test_job_id",
"section_break_djoz",
"queue_status",
"column_break_wjoz",
"background_workers",
"scheduler_section",
"scheduler_status",
"failing_scheduled_jobs",
"database_section",
"database",
"database_version",
Expand Down Expand Up @@ -348,10 +349,6 @@
"fieldtype": "Section Break",
"label": "Realtime (SocketIO)"
},
{
"fieldname": "column_break_mney",
"fieldtype": "Column Break"
},
{
"documentation_url": "/app/rq-job",
"fieldname": "background_jobs_check",
Expand All @@ -367,14 +364,25 @@
{
"fieldname": "column_break_fzke",
"fieldtype": "Column Break"
},
{
"fieldname": "scheduler_section",
"fieldtype": "Section Break",
"label": "Scheduler"
},
{
"fieldname": "failing_scheduled_jobs",
"fieldtype": "Table",
"label": "Failing Scheduled Jobs (last 7 days)",
"options": "System Health Report Failing Jobs"
}
],
"hide_toolbar": 1,
"index_web_pages_for_search": 1,
"is_virtual": 1,
"issingle": 1,
"links": [],
"modified": "2024-04-19 17:12:37.889872",
"modified": "2024-04-22 11:47:52.194784",
"modified_by": "Administrator",
"module": "Desk",
"name": "System Health Report",
Expand Down
31 changes: 31 additions & 0 deletions frappe/desk/doctype/system_health_report/system_health_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ class SystemHealthReport(Document):
from frappe.desk.doctype.system_health_report_errors.system_health_report_errors import (
SystemHealthReportErrors,
)
from frappe.desk.doctype.system_health_report_failing_jobs.system_health_report_failing_jobs import (
SystemHealthReportFailingJobs,
)
from frappe.desk.doctype.system_health_report_queue.system_health_report_queue import (
SystemHealthReportQueue,
)
Expand All @@ -82,6 +85,7 @@ class SystemHealthReport(Document):
db_storage_usage: DF.Float
failed_emails: DF.Int
failed_logins: DF.Int
failing_scheduled_jobs: DF.Table[SystemHealthReportFailingJobs]
handled_emails: DF.Int
last_10_active_users: DF.Code | None
new_users: DF.Int
Expand Down Expand Up @@ -115,6 +119,7 @@ def load_from_db(self):
# This is best done by initializing fields with values that indicate that we haven't yet
# fetched the values.
self.fetch_background_jobs()
self.fetch_scheduler()
self.fetch_email_stats()
self.fetch_errors()
self.fetch_database_details()
Expand Down Expand Up @@ -155,6 +160,32 @@ def fetch_background_jobs(self):
},
)

@health_check("Scheduler")
def fetch_scheduler(self):
lower_threshold = add_to_date(None, days=-7, as_datetime=True)
# Exclude "maybe" curently executing job
upper_threshold = add_to_date(None, minutes=-30, as_datetime=True)
self.scheduler_status = get_scheduler_status().get("status")
failing_jobs = frappe.db.sql(
"""
select scheduled_job_type,
avg(CASE WHEN status != 'Complete' THEN 1 ELSE 0 END) * 100 as failure_rate
from `tabScheduled Job Log`
where
creation > %(lower_threshold)s
and modified > %(lower_threshold)s
and creation < %(upper_threshold)s
group by scheduled_job_type
having failure_rate > 0
order by failure_rate desc
limit 5""",
{"lower_threshold": lower_threshold, "upper_threshold": upper_threshold},
as_dict=True,
)

for job in failing_jobs:
self.append("failing_scheduled_jobs", job)

@health_check("Emails")
def fetch_email_stats(self):
threshold = add_to_date(None, days=-7, as_datetime=True)
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"actions": [],
"allow_rename": 1,
"creation": "2024-04-22 11:45:32.923379",
"doctype": "DocType",
"editable_grid": 1,
"engine": "InnoDB",
"field_order": [
"scheduled_job_type",
"failure_rate"
],
"fields": [
{
"fieldname": "scheduled_job_type",
"fieldtype": "Link",
"in_list_view": 1,
"label": "Scheduled Job Type",
"options": "Scheduled Job Type"
},
{
"fieldname": "failure_rate",
"fieldtype": "Percent",
"in_list_view": 1,
"label": "Failure Rate"
}
],
"index_web_pages_for_search": 1,
"is_virtual": 1,
"istable": 1,
"links": [],
"modified": "2024-04-22 11:46:53.574720",
"modified_by": "Administrator",
"module": "Desk",
"name": "System Health Report Failing Jobs",
"owner": "Administrator",
"permissions": [],
"sort_field": "creation",
"sort_order": "DESC",
"states": []
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright (c) 2024, Frappe Technologies and contributors
# For license information, please see license.txt

# import frappe
from frappe.model.document import Document


class SystemHealthReportFailingJobs(Document):
# begin: auto-generated types
# This code is auto-generated. Do not modify anything in this block.

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from frappe.types import DF

failure_rate: DF.Percent
parent: DF.Data
parentfield: DF.Data
parenttype: DF.Data
scheduled_job_type: DF.Link | None
# end: auto-generated types

def db_insert(self, *args, **kwargs):
raise NotImplementedError

def load_from_db(self):
raise NotImplementedError

def db_update(self):
raise NotImplementedError

def delete(self):
raise NotImplementedError

@staticmethod
def get_list(filters=None, page_length=20, **kwargs):
pass

@staticmethod
def get_count(filters=None, **kwargs):
pass

@staticmethod
def get_stats(**kwargs):
pass

0 comments on commit c712780

Please sign in to comment.