Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option of sending DAG parser logs to stdout. #25754

Merged
merged 4 commits into from
Sep 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion airflow/config_templates/airflow_local_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
FAB_LOG_LEVEL: str = conf.get_mandatory_value('logging', 'FAB_LOGGING_LEVEL').upper()

LOG_FORMAT: str = conf.get_mandatory_value('logging', 'LOG_FORMAT')
DAG_PROCESSOR_LOG_FORMAT: str = conf.get_mandatory_value('logging', 'DAG_PROCESSOR_LOG_FORMAT')

LOG_FORMATTER_CLASS: str = conf.get_mandatory_value(
'logging', 'LOG_FORMATTER_CLASS', fallback='airflow.utils.log.timezone_aware.TimezoneAware'
Expand All @@ -48,6 +49,8 @@

COLORED_FORMATTER_CLASS: str = conf.get_mandatory_value('logging', 'COLORED_FORMATTER_CLASS')

DAG_PROCESSOR_LOG_TARGET: str = conf.get_mandatory_value('logging', 'DAG_PROCESSOR_LOG_TARGET')

BASE_LOG_FOLDER: str = conf.get_mandatory_value('logging', 'BASE_LOG_FOLDER')

PROCESSOR_LOG_FOLDER: str = conf.get_mandatory_value('scheduler', 'CHILD_PROCESS_LOG_DIRECTORY')
Expand Down Expand Up @@ -75,6 +78,10 @@
'format': COLORED_LOG_FORMAT if COLORED_LOG else LOG_FORMAT,
'class': COLORED_FORMATTER_CLASS if COLORED_LOG else LOG_FORMATTER_CLASS,
},
'source_processor': {
'format': DAG_PROCESSOR_LOG_FORMAT,
'class': LOG_FORMATTER_CLASS,
},
},
'filters': {
'mask_secrets': {
Expand All @@ -101,10 +108,16 @@
'filename_template': PROCESSOR_FILENAME_TEMPLATE,
'filters': ['mask_secrets'],
},
'processor_to_stdout': {
'class': 'airflow.utils.log.logging_mixin.RedirectStdHandler',
'formatter': 'source_processor',
'stream': 'sys.stdout',
'filters': ['mask_secrets'],
},
},
'loggers': {
'airflow.processor': {
'handlers': ['processor'],
'handlers': ['processor_to_stdout' if DAG_PROCESSOR_LOG_TARGET == "stdout" else 'processor'],
'level': LOG_LEVEL,
'propagate': False,
},
Expand Down
15 changes: 15 additions & 0 deletions airflow/config_templates/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,21 @@
type: string
example: ~
default: "%%(asctime)s %%(levelname)s - %%(message)s"
- name: dag_processor_log_target
description: Where to send dag parser logs. If "file",
logs are sent to log files defined by child_process_log_directory.
version_added: 2.4.0
type: string
example: ~
default: "file"
- name: dag_processor_log_format
description: |
Format of Dag Processor Log line
version_added: 2.3.4
type: string
example: ~
default: "[%%(asctime)s] [SOURCE:DAG_PROCESSOR]
{{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s"
- name: log_formatter_class
description: ~
version_added: 2.3.4
Expand Down
6 changes: 6 additions & 0 deletions airflow/config_templates/default_airflow.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,12 @@ colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatte
# Format of Log line
log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s

# Where to send dag parser logs. If "file", logs are sent to log files defined by child_process_log_directory.
dag_processor_log_target = file

# Format of Dag Processor Log line
dag_processor_log_format = [%%(asctime)s] [SOURCE:DAG_PROCESSOR] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s
log_formatter_class = airflow.utils.log.timezone_aware.TimezoneAware

# Specify prefix pattern like mentioned below with stream handler TaskHandlerWithCustomFormatter
Expand Down
55 changes: 31 additions & 24 deletions airflow/dag_processing/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,31 +143,38 @@ def _run_file_processor(

set_context(log, file_path)
setproctitle(f"airflow scheduler - DagFileProcessor {file_path}")

def _handle_dag_file_processing():
# Re-configure the ORM engine as there are issues with multiple processes
# settings.configure_orm()

# Change the thread name to differentiate log lines. This is
# really a separate process, but changing the name of the
# process doesn't work, so changing the thread name instead.
threading.current_thread().name = thread_name

log.info("Started process (PID=%s) to work on %s", os.getpid(), file_path)
dag_file_processor = DagFileProcessor(dag_ids=dag_ids, dag_directory=dag_directory, log=log)
result: Tuple[int, int] = dag_file_processor.process_file(
file_path=file_path,
pickle_dags=pickle_dags,
callback_requests=callback_requests,
)
result_channel.send(result)

try:
# redirect stdout/stderr to log
with redirect_stdout(StreamLogWriter(log, logging.INFO)), redirect_stderr(
StreamLogWriter(log, logging.WARN)
), Stats.timer() as timer:
# Re-configure the ORM engine as there are issues with multiple processes
settings.configure_orm()

# Change the thread name to differentiate log lines. This is
# really a separate process, but changing the name of the
# process doesn't work, so changing the thread name instead.
threading.current_thread().name = thread_name

log.info("Started process (PID=%s) to work on %s", os.getpid(), file_path)
dag_file_processor = DagFileProcessor(
dag_ids=dag_ids,
dag_directory=dag_directory,
log=log,
)
result: Tuple[int, int] = dag_file_processor.process_file(
file_path=file_path,
pickle_dags=pickle_dags,
callback_requests=callback_requests,
)
result_channel.send(result)
DAG_PROCESSOR_LOG_TARGET = conf.get_mandatory_value('logging', 'DAG_PROCESSOR_LOG_TARGET')
if DAG_PROCESSOR_LOG_TARGET == "stdout":
with Stats.timer() as timer:
_handle_dag_file_processing()
else:
# The following line ensures that stdout goes to the same destination as the logs. If stdout
# gets sent to logs and logs are sent to stdout, this leads to an infinite loop. This
# necessitates this conditional based on the value of DAG_PROCESSOR_LOG_TARGET.
with redirect_stdout(StreamLogWriter(log, logging.INFO)), redirect_stderr(
StreamLogWriter(log, logging.WARN)
), Stats.timer() as timer:
_handle_dag_file_processing()
log.info("Processing %s took %.3f seconds", file_path, timer.duration)
except Exception:
# Log exceptions through the logging framework.
Expand Down
48 changes: 47 additions & 1 deletion tests/dag_processing/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from airflow.callbacks.callback_requests import TaskCallbackRequest
from airflow.configuration import TEST_DAGS_FOLDER, conf
from airflow.dag_processing.manager import DagFileProcessorAgent
from airflow.dag_processing.processor import DagFileProcessor
from airflow.dag_processing.processor import DagFileProcessor, DagFileProcessorProcess
from airflow.models import DagBag, DagModel, SlaMiss, TaskInstance, errors
from airflow.models.serialized_dag import SerializedDagModel
from airflow.models.taskinstance import SimpleTaskInstance
Expand Down Expand Up @@ -788,6 +788,52 @@ def test_import_error_tracebacks_zip_depth(self, tmpdir):
assert import_error.stacktrace == expected_stacktrace.format(invalid_dag_filename)
session.rollback()

@conf_vars({("logging", "dag_processor_log_target"): "stdout"})
@mock.patch('airflow.dag_processing.processor.settings.dispose_orm', MagicMock)
@mock.patch('airflow.dag_processing.processor.redirect_stdout')
def test_dag_parser_output_when_logging_to_stdout(self, mock_redirect_stdout_for_file):
processor = DagFileProcessorProcess(
file_path='abc.txt',
pickle_dags=False,
dag_ids=[],
dag_directory=[],
callback_requests=[],
)
processor._run_file_processor(
result_channel=MagicMock(),
parent_channel=MagicMock(),
file_path="fake_file_path",
pickle_dags=False,
dag_ids=[],
thread_name="fake_thread_name",
callback_requests=[],
dag_directory=[],
)
mock_redirect_stdout_for_file.assert_not_called()

@conf_vars({("logging", "dag_processor_log_target"): "file"})
@mock.patch('airflow.dag_processing.processor.settings.dispose_orm', MagicMock)
@mock.patch('airflow.dag_processing.processor.redirect_stdout')
def test_dag_parser_output_when_logging_to_file(self, mock_redirect_stdout_for_file):
processor = DagFileProcessorProcess(
file_path='abc.txt',
pickle_dags=False,
dag_ids=[],
dag_directory=[],
callback_requests=[],
)
processor._run_file_processor(
result_channel=MagicMock(),
parent_channel=MagicMock(),
file_path="fake_file_path",
pickle_dags=False,
dag_ids=[],
thread_name="fake_thread_name",
callback_requests=[],
dag_directory=[],
)
mock_redirect_stdout_for_file.assert_called_once()


class TestProcessorAgent:
@pytest.fixture(autouse=True)
Expand Down