From 5cc722f1e64af1e5d2955132ae8b76b74d5c1f63 Mon Sep 17 00:00:00 2001 From: chico Date: Fri, 13 Nov 2020 16:28:10 +0100 Subject: [PATCH 1/6] logging server --- autosklearn/automl.py | 56 +++++++- autosklearn/ensemble_builder.py | 54 +++----- autosklearn/util/logging_.py | 139 +++++++++++++++++++- test/test_ensemble_builder/test_ensemble.py | 2 - 4 files changed, 210 insertions(+), 41 deletions(-) diff --git a/autosklearn/automl.py b/autosklearn/automl.py index 59ee4c119c..b62795bc00 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -3,6 +3,7 @@ import io import json import platform +import logging.handlers import os import sys import time @@ -10,6 +11,7 @@ import unittest.mock import warnings import tempfile +import threading from ConfigSpace.read_and_write import json as cs_json import dask.distributed @@ -39,7 +41,12 @@ from autosklearn.metrics import calculate_score from autosklearn.util.backend import Backend from autosklearn.util.stopwatch import StopWatch -from autosklearn.util.logging_ import get_logger, setup_logger +from autosklearn.util.logging_ import ( + get_logger, + is_port_in_use, + LogRecordSocketReceiver, + setup_logger, +) from autosklearn.util import pipeline, RE_PATTERN from autosklearn.ensemble_builder import EnsembleBuilderManager from autosklearn.ensembles.singlebest_ensemble import SingleBest @@ -258,12 +265,45 @@ def _close_dask_client(self): def _get_logger(self, name): logger_name = 'AutoML(%d):%s' % (self._seed, name) + + # Setup the configuration for the logger + # This is gonna be honored by the server + # Which is created below setup_logger(os.path.join(self._backend.temporary_directory, '%s.log' % str(logger_name)), self.logging_config, ) + + # The desired port might be used, so check this + while is_port_in_use(self._logger_port): + self._logger_port += 1 + + # As Auto-sklearn works with distributed process, + # we implement a logger server that can receive tcp + # pickled messages. They are unpickled and processed locally + # under the above logging configuration setting + # We need to specify the logger_name so that received records + # are treated under the logger_name ROOT logger setting + self.stop_logging_server = threading.Event() + self.logger_tcpserver = LogRecordSocketReceiver(logname=logger_name, + port=self._logger_port, + event=self.stop_logging_server) + self.logging_server = threading.Thread(target=self.logger_tcpserver.serve_until_stopped) + self.logging_server.daemon = False + self.logging_server.start() return get_logger(logger_name) + def _clean_logger(self): + if not hasattr(self, 'stop_logging_server') or self.stop_logging_server is None: + return + + # Clean up the logger + if self.logging_server.is_alive(): + self.stop_logging_server.set() + self.logging_server.join() + del self.logger_tcpserver + del self.stop_logging_server + @staticmethod def _start_task(watcher, task_name): watcher.start_task(task_name) @@ -403,6 +443,8 @@ def fit( self._dataset_name = dataset_name self._stopwatch.start_task(self._dataset_name) + # By default try to use the TCP logging port or get a new port + self._logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT self._logger = self._get_logger(dataset_name) if feat_type is not None and len(feat_type) != X.shape[1]: @@ -589,8 +631,8 @@ def fit( max_iterations=None, read_at_most=np.inf, ensemble_memory_limit=self._memory_limit, - logger_name=self._logger.name, random_state=self._seed, + logger_port=self._logger_port, ) self._stopwatch.stop_task(ensemble_task_name) @@ -694,6 +736,9 @@ def fit( self._load_models() self._close_dask_client() + # Clean up the logger + self._clean_logger() + return self def refit(self, X, y): @@ -866,7 +911,7 @@ def fit_ensemble(self, y, task=None, precision=32, read_at_most=np.inf, ensemble_memory_limit=self._memory_limit, random_state=self._seed, - logger_name=self._logger.name, + logger_port=self._logger_port, ) manager.build_ensemble(self._dask_client) future = manager.futures.pop() @@ -1157,9 +1202,14 @@ def configuration_space_created_hook(self, datamanager, configuration_space): def __getstate__(self) -> Dict[str, Any]: # Cannot serialize a client! self._dask_client = None + self.logging_server = None + self.stop_logging_server = None return self.__dict__ def __del__(self): + # Clean up the logger + self._clean_logger() + self._close_dask_client() # When a multiprocessing work is done, the diff --git a/autosklearn/ensemble_builder.py b/autosklearn/ensemble_builder.py index 92c50a9995..2789ea361e 100644 --- a/autosklearn/ensemble_builder.py +++ b/autosklearn/ensemble_builder.py @@ -3,6 +3,7 @@ import gzip import math import numbers +import logging.handlers import os import pickle import re @@ -27,7 +28,7 @@ from autosklearn.metrics import calculate_score, Scorer from autosklearn.ensembles.ensemble_selection import EnsembleSelection from autosklearn.ensembles.abstract_ensemble import AbstractEnsemble -from autosklearn.util.logging_ import get_logger, setup_logger +from autosklearn.util.logging_ import get_named_client_logger Y_ENSEMBLE = 0 Y_VALID = 1 @@ -54,7 +55,7 @@ def __init__( read_at_most: int, ensemble_memory_limit: Optional[int], random_state: int, - logger_name: str, + logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT, ): """ SMAC callback to handle ensemble building @@ -100,8 +101,8 @@ def __init__( memory limit in mb. If ``None``, no memory limit is enforced. read_at_most: int read at most n new prediction files in each iteration - logger_name: str - Name of the logger where we are gonna write information + logger_port: int + port that receives logging records Returns ------- @@ -124,7 +125,7 @@ def __init__( self.read_at_most = read_at_most self.ensemble_memory_limit = ensemble_memory_limit self.random_state = random_state - self.logger_name = logger_name + self.logger_port = logger_port # Store something similar to SMAC's runhistory self.history = [] @@ -152,11 +153,7 @@ def build_ensemble(self, dask_client: dask.distributed.Client) -> None: # The second criteria is elapsed time elapsed_time = time.time() - self.start_time - logger = EnsembleBuilder._get_ensemble_logger( - self.logger_name, - self.backend.temporary_directory, - False - ) + logger = EnsembleBuilder._get_ensemble_logger(self.logger_port) # First test for termination conditions if self.time_left_for_ensembles < elapsed_time: @@ -216,11 +213,11 @@ def build_ensemble(self, dask_client: dask.distributed.Client) -> None: memory_limit=self.ensemble_memory_limit, read_at_most=self.read_at_most, random_state=self.seed, - logger_name=self.logger_name, end_at=self.start_time + self.time_left_for_ensembles, iteration=self.iteration, return_predictions=False, priority=100, + logger_port=self.logger_port, )) logger.info( @@ -254,10 +251,10 @@ def fit_and_return_ensemble( memory_limit: Optional[int], read_at_most: int, random_state: int, - logger_name: str, end_at: float, iteration: int, return_predictions: bool, + logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT, ) -> Tuple[ List[Tuple[int, float, float, float]], int, @@ -305,8 +302,6 @@ def fit_and_return_ensemble( memory limit in mb. If ``None``, no memory limit is enforced. read_at_most: int read at most n new prediction files in each iteration - logger_name: str - Name of the logger where we are gonna write information end_at: float At what time the job must finish. Needs to be the endtime and not the time left because we do not know when dask schedules the job. @@ -333,7 +328,7 @@ def fit_and_return_ensemble( memory_limit=memory_limit, read_at_most=read_at_most, random_state=random_state, - logger_name=logger_name, + logger_port=logger_port, ).run( end_at=end_at, iteration=iteration, @@ -358,7 +353,7 @@ def __init__( memory_limit: Optional[int] = 1024, read_at_most: int = 5, random_state: Optional[Union[int, np.random.RandomState]] = None, - logger_name: str = 'ensemble_builder', + logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT, ): """ Constructor @@ -404,8 +399,8 @@ def __init__( memory limit in mb. If ``None``, no memory limit is enforced. read_at_most: int read at most n new prediction files in each iteration - logger_name: str - Name of the logger where we are gonna write information + logger_port: int + port that receives logging records """ super(EnsembleBuilder, self).__init__() @@ -446,9 +441,8 @@ def __init__( self.random_state = check_random_state(random_state) # Setup the logger - self.logger_name = logger_name - self.logger = self._get_ensemble_logger( - self.logger_name, self.backend.temporary_directory, False) + self.logger_port = logger_port + self.logger = self._get_ensemble_logger(self.logger_port) if ensemble_nbest == 1: self.logger.debug("Behaviour depends on int/float: %s, %s (ensemble_nbest, type)" % @@ -536,21 +530,13 @@ def __init__( self.ensemble_history = [] @classmethod - def _get_ensemble_logger(self, logger_name, dirname, setup): + def _get_ensemble_logger(self, port: int): """ Returns the logger of for the ensemble process. A subprocess will require to set this up, for instance, pynisher forks """ - if setup: - setup_logger( - os.path.join( - dirname, - '%s.log' % str(logger_name) - ), - ) - - return get_logger('EnsembleBuilder') + return get_named_client_logger('EnsembleBuilder', port=port) def run( self, @@ -566,8 +552,7 @@ def run( elif time_left is not None and end_at is not None: raise ValueError('Cannot provide both time_left and end_at.') - self.logger = self._get_ensemble_logger( - self.logger_name, self.backend.temporary_directory, True) + self.logger = self._get_ensemble_logger(self.logger_port) process_start_time = time.time() while True: @@ -636,8 +621,7 @@ def main(self, time_left, iteration, return_predictions): # Pynisher jobs inside dask 'forget' # the logger configuration. So we have to set it up # accordingly - self.logger = self._get_ensemble_logger( - self.logger_name, self.backend.temporary_directory, False) + self.logger = self._get_ensemble_logger(self.logger_port) self.start_time = time.time() train_pred, valid_pred, test_pred = None, None, None diff --git a/autosklearn/util/logging_.py b/autosklearn/util/logging_.py index 78295b98fe..d310cd698c 100644 --- a/autosklearn/util/logging_.py +++ b/autosklearn/util/logging_.py @@ -1,8 +1,15 @@ # -*- encoding: utf-8 -*- import logging import logging.config +import logging.handlers import os -from typing import Any, Dict, Optional +import pickle +import select +import socket +import socketserver +import struct +import threading +from typing import Any, Dict, Optional, Type import yaml @@ -33,6 +40,58 @@ def get_logger(name: str) -> 'PickableLoggerAdapter': return logger +def is_port_in_use(port: int) -> bool: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + return s.connect_ex(('localhost', port)) == 0 + + +def get_named_client_logger(name: str, host: str = 'localhost', + port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT + ) -> logging.Logger: + """ + When working with a logging server, clients are expected to create a logger using + this method. For example, the automl object will create a master that awaits + for records sent through tcp to localhost. + + Ensemble builder will then instantiate a logger object that will submit records + via a socket handler to the server. + + We do not need to use any format as the server will render the msg as it + needs to. + + Parameters + ---------- + name: (str) + the name of the logger, used to tag the messages in the main log + host: (str) + Address of where the server is gonna look for messages + + Returns + ------- + local_loger: a logger object that has a socket handler + """ + local_logger = logging.getLogger(name) + + # Remove any handler, so that the server handles + # how to process the message + local_logger.handlers.clear() + + # We also need to remove propagate, else the logger + # will use the ROOT configuration. That is, automl sets + # a default ROOT configuration to print to the log file + # Then we also set another handler. If two hanlders (the + # ROOT handler and our new socket are avaiable) we print + # twice to the log file + local_logger.propagate = False + + socketHandler = logging.handlers.SocketHandler( + 'localhost', + port + ) + local_logger.addHandler(socketHandler) + return local_logger + + class PickableLoggerAdapter(object): def __init__(self, name: str): @@ -86,3 +145,81 @@ def log(self, level: int, msg: str, *args: Any, **kwargs: Any) -> None: def isEnabledFor(self, level: int) -> bool: return self.logger.isEnabledFor(level) + + +class LogRecordStreamHandler(socketserver.StreamRequestHandler): + """Handler for a streaming logging request. + + This basically logs the record using whatever logging policy is + configured locally. + """ + + def handle(self) -> None: + """ + Handle multiple requests - each expected to be a 4-byte length, + followed by the LogRecord in pickle format. Logs the record + according to whatever policy is configured locally. + """ + while True: + chunk = self.connection.recv(4) + if len(chunk) < 4: + break + slen = struct.unpack('>L', chunk)[0] + chunk = self.connection.recv(slen) + while len(chunk) < slen: + chunk = chunk + self.connection.recv(slen - len(chunk)) + obj = self.unPickle(chunk) + record = logging.makeLogRecord(obj) + self.handleLogRecord(record) + + def unPickle(self, data: Any) -> Any: + return pickle.loads(data) + + def handleLogRecord(self, record: logging.LogRecord) -> None: + # logname is define in LogRecordSocketReceiver + # Yet Mypy Cannot see this. This is needed so that we can + # re-use the logging setup for autosklearn into the recieved + # records + if self.server.logname is not None: # type: ignore # noqa + name = self.server.logname # type: ignore # noqa + else: + name = record.name + logger = logging.getLogger(name) + # N.B. EVERY record gets logged. This is because Logger.handle + # is normally called AFTER logger-level filtering. If you want + # to do filtering, do it at the client end to save wasting + # cycles and network bandwidth! + logger.handle(record) + + +class LogRecordSocketReceiver(socketserver.ThreadingTCPServer): + """ + This class implement a entity that receives tcp messages on a given address + For further information, please check + https://docs.python.org/3/howto/logging-cookbook.html#configuration-server-example + """ + + allow_reuse_address = True + + def __init__(self, + host: str = 'localhost', + port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT, + handler: Type[LogRecordStreamHandler] = LogRecordStreamHandler, + logname: Optional[str] = None, + event: threading.Event = None, + ): + socketserver.ThreadingTCPServer.__init__(self, (host, port), handler) + self.timeout = 1 + self.logname = logname + self.event = event + + def serve_until_stopped(self) -> None: + while True: + rd, wr, ex = select.select([self.socket.fileno()], + [], [], + self.timeout) + if rd: + self.handle_request() + + if self.event is not None and self.event.is_set(): + break diff --git a/test/test_ensemble_builder/test_ensemble.py b/test/test_ensemble_builder/test_ensemble.py index 03633b5174..31aa1f4166 100644 --- a/test/test_ensemble_builder/test_ensemble.py +++ b/test/test_ensemble_builder/test_ensemble.py @@ -749,7 +749,6 @@ def test_ensemble_builder_process_realrun(dask_client, ensemble_backend): read_at_most=np.inf, ensemble_memory_limit=None, random_state=0, - logger_name='Ensemblebuilder', ) manager.build_ensemble(dask_client) future = manager.futures.pop() @@ -789,7 +788,6 @@ def test_ensemble_builder_nbest_remembered(fit_ensemble, ensemble_backend, dask_ read_at_most=np.inf, ensemble_memory_limit=1000, random_state=0, - logger_name='Ensemblebuilder', max_iterations=None, ) From 0c55d5d1eaf3d16c22f9e82c3a3ce3c53478cf3f Mon Sep 17 00:00:00 2001 From: chico Date: Tue, 17 Nov 2020 14:37:42 +0100 Subject: [PATCH 2/6] multiprocessing in server --- autosklearn/automl.py | 17 +++++++++++++---- autosklearn/ensemble_builder.py | 19 ++++++------------- autosklearn/util/logging_.py | 1 + 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/autosklearn/automl.py b/autosklearn/automl.py index b62795bc00..370198dc2a 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -4,6 +4,7 @@ import json import platform import logging.handlers +import multiprocessing import os import sys import time @@ -11,7 +12,6 @@ import unittest.mock import warnings import tempfile -import threading from ConfigSpace.read_and_write import json as cs_json import dask.distributed @@ -284,11 +284,12 @@ def _get_logger(self, name): # under the above logging configuration setting # We need to specify the logger_name so that received records # are treated under the logger_name ROOT logger setting - self.stop_logging_server = threading.Event() + self.stop_logging_server = multiprocessing.Event() self.logger_tcpserver = LogRecordSocketReceiver(logname=logger_name, port=self._logger_port, event=self.stop_logging_server) - self.logging_server = threading.Thread(target=self.logger_tcpserver.serve_until_stopped) + self.logging_server = multiprocessing.Process( + target=self.logger_tcpserver.serve_until_stopped) self.logging_server.daemon = False self.logging_server.start() return get_logger(logger_name) @@ -300,7 +301,15 @@ def _clean_logger(self): # Clean up the logger if self.logging_server.is_alive(): self.stop_logging_server.set() - self.logging_server.join() + + # We try to join the process, after we sent + # the terminate event. Then we try a join to + # nicely join the event. In case something + # bad happens with nicely trying to kill the + # process, we execute a terminate to kill the + # process. + self.logging_server.join(timeout=5) + self.logging_server.terminate() del self.logger_tcpserver del self.stop_logging_server diff --git a/autosklearn/ensemble_builder.py b/autosklearn/ensemble_builder.py index 2789ea361e..0a344e96e2 100644 --- a/autosklearn/ensemble_builder.py +++ b/autosklearn/ensemble_builder.py @@ -153,7 +153,7 @@ def build_ensemble(self, dask_client: dask.distributed.Client) -> None: # The second criteria is elapsed time elapsed_time = time.time() - self.start_time - logger = EnsembleBuilder._get_ensemble_logger(self.logger_port) + logger = get_named_client_logger('EnsembleBuilder', port=self.logger_port) # First test for termination conditions if self.time_left_for_ensembles < elapsed_time: @@ -307,6 +307,8 @@ def fit_and_return_ensemble( because we do not know when dask schedules the job. iteration: int The current iteration + logger_port: int + The port where the logging server is listening to. Returns ------- @@ -442,7 +444,7 @@ def __init__( # Setup the logger self.logger_port = logger_port - self.logger = self._get_ensemble_logger(self.logger_port) + self.logger = get_named_client_logger('EnsembleBuilder', port=self.logger_port) if ensemble_nbest == 1: self.logger.debug("Behaviour depends on int/float: %s, %s (ensemble_nbest, type)" % @@ -529,15 +531,6 @@ def __init__( del datamanager self.ensemble_history = [] - @classmethod - def _get_ensemble_logger(self, port: int): - """ - Returns the logger of for the ensemble process. - A subprocess will require to set this up, for instance, - pynisher forks - """ - return get_named_client_logger('EnsembleBuilder', port=port) - def run( self, iteration: int, @@ -552,7 +545,7 @@ def run( elif time_left is not None and end_at is not None: raise ValueError('Cannot provide both time_left and end_at.') - self.logger = self._get_ensemble_logger(self.logger_port) + self.logger = get_named_client_logger('EnsembleBuilder', port=self.logger_port) process_start_time = time.time() while True: @@ -621,7 +614,7 @@ def main(self, time_left, iteration, return_predictions): # Pynisher jobs inside dask 'forget' # the logger configuration. So we have to set it up # accordingly - self.logger = self._get_ensemble_logger(self.logger_port) + self.logger = get_named_client_logger('EnsembleBuilder', port=self.logger_port) self.start_time = time.time() train_pred, valid_pred, test_pred = None, None, None diff --git a/autosklearn/util/logging_.py b/autosklearn/util/logging_.py index d310cd698c..aa9677e741 100644 --- a/autosklearn/util/logging_.py +++ b/autosklearn/util/logging_.py @@ -223,3 +223,4 @@ def serve_until_stopped(self) -> None: if self.event is not None and self.event.is_set(): break + print("Finished the process") From 738288226d073d2b1aa1739c85be0572bbb8e08b Mon Sep 17 00:00:00 2001 From: chico Date: Wed, 18 Nov 2020 12:53:48 +0100 Subject: [PATCH 3/6] Removing push! --- autosklearn/util/logging_.py | 1 - 1 file changed, 1 deletion(-) diff --git a/autosklearn/util/logging_.py b/autosklearn/util/logging_.py index aa9677e741..d310cd698c 100644 --- a/autosklearn/util/logging_.py +++ b/autosklearn/util/logging_.py @@ -223,4 +223,3 @@ def serve_until_stopped(self) -> None: if self.event is not None and self.event.is_set(): break - print("Finished the process") From 8e3097b86b664173e929039fc7c8c8a63329ec9a Mon Sep 17 00:00:00 2001 From: chico Date: Wed, 18 Nov 2020 17:58:26 +0100 Subject: [PATCH 4/6] Pickable logging client! --- autosklearn/automl.py | 7 +++++++ autosklearn/util/logging_.py | 14 +++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/autosklearn/automl.py b/autosklearn/automl.py index 370198dc2a..2f724fd091 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -737,6 +737,13 @@ def fit( # while the ensemble builder tries to access the data if proc_ensemble is not None: self.ensemble_performance_history = list(proc_ensemble.history) + + # save the ensemble performance history file + if len(self.ensemble_performance_history) > 0: + pd.DataFrame( + self.ensemble_performance_history).to_csv( + os.path.join(self._backend.internals_directory, 'ensemble_history.csv')) + if len(proc_ensemble.futures) > 0: future = proc_ensemble.futures.pop() future.cancel() diff --git a/autosklearn/util/logging_.py b/autosklearn/util/logging_.py index d310cd698c..4b3ae9061f 100644 --- a/autosklearn/util/logging_.py +++ b/autosklearn/util/logging_.py @@ -47,7 +47,7 @@ def is_port_in_use(port: int) -> bool: def get_named_client_logger(name: str, host: str = 'localhost', port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT - ) -> logging.Logger: + ) -> 'PickableLoggerAdapter': """ When working with a logging server, clients are expected to create a logger using this method. For example, the automl object will create a master that awaits @@ -70,11 +70,11 @@ def get_named_client_logger(name: str, host: str = 'localhost', ------- local_loger: a logger object that has a socket handler """ - local_logger = logging.getLogger(name) + local_logger = PickableLoggerAdapter(name) # Remove any handler, so that the server handles # how to process the message - local_logger.handlers.clear() + local_logger.logger.handlers.clear() # We also need to remove propagate, else the logger # will use the ROOT configuration. That is, automl sets @@ -82,13 +82,17 @@ def get_named_client_logger(name: str, host: str = 'localhost', # Then we also set another handler. If two hanlders (the # ROOT handler and our new socket are avaiable) we print # twice to the log file - local_logger.propagate = False + local_logger.logger.propagate = False socketHandler = logging.handlers.SocketHandler( 'localhost', port ) - local_logger.addHandler(socketHandler) + local_logger.logger.addHandler(socketHandler) + + # Pynisher messages are debug, so we want to see them + local_logger.logger.setLevel(logging.DEBUG) + return local_logger From 0cd9c2db6299867c76a64f7853083a45e714a13e Mon Sep 17 00:00:00 2001 From: chico Date: Wed, 18 Nov 2020 23:29:01 +0100 Subject: [PATCH 5/6] New file in outputs --- test/test_automl/test_automl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_automl/test_automl.py b/test/test_automl/test_automl.py index 6cc701976a..b6077269b0 100644 --- a/test/test_automl/test_automl.py +++ b/test/test_automl/test_automl.py @@ -308,6 +308,7 @@ def test_automl_outputs(backend, dask_client): 'ensemble_read_scores.pkl', 'runs', 'ensembles', + 'ensemble_history.csv', ] assert ( sorted(os.listdir(os.path.join(backend.temporary_directory, '.auto-sklearn'))) From d06abf0a563e2eae088bbaad995b8be2a96cdad0 Mon Sep 17 00:00:00 2001 From: chico Date: Thu, 19 Nov 2020 17:11:03 +0100 Subject: [PATCH 6/6] Added feedback from comments --- autosklearn/automl.py | 5 ++--- autosklearn/util/logging.yaml | 4 ++++ autosklearn/util/logging_.py | 14 +++----------- test/test_automl/test_automl.py | 2 +- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/autosklearn/automl.py b/autosklearn/automl.py index 2f724fd091..520abf97c9 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -740,9 +740,8 @@ def fit( # save the ensemble performance history file if len(self.ensemble_performance_history) > 0: - pd.DataFrame( - self.ensemble_performance_history).to_csv( - os.path.join(self._backend.internals_directory, 'ensemble_history.csv')) + pd.DataFrame(self.ensemble_performance_history).to_json( + os.path.join(self._backend.internals_directory, 'ensemble_history.json')) if len(proc_ensemble.futures) > 0: future = proc_ensemble.futures.pop() diff --git a/autosklearn/util/logging.yaml b/autosklearn/util/logging.yaml index ea97eee8ed..613154d228 100644 --- a/autosklearn/util/logging.yaml +++ b/autosklearn/util/logging.yaml @@ -44,3 +44,7 @@ loggers: smac.optimizer.smbo.SMBO: level: INFO handlers: [file_handler, console] + + EnsembleBuilder: + level: DEBUG + propagate: no diff --git a/autosklearn/util/logging_.py b/autosklearn/util/logging_.py index 4b3ae9061f..1e91783a64 100644 --- a/autosklearn/util/logging_.py +++ b/autosklearn/util/logging_.py @@ -70,29 +70,21 @@ def get_named_client_logger(name: str, host: str = 'localhost', ------- local_loger: a logger object that has a socket handler """ + # Setup the logger configuration + setup_logger() + local_logger = PickableLoggerAdapter(name) # Remove any handler, so that the server handles # how to process the message local_logger.logger.handlers.clear() - # We also need to remove propagate, else the logger - # will use the ROOT configuration. That is, automl sets - # a default ROOT configuration to print to the log file - # Then we also set another handler. If two hanlders (the - # ROOT handler and our new socket are avaiable) we print - # twice to the log file - local_logger.logger.propagate = False - socketHandler = logging.handlers.SocketHandler( 'localhost', port ) local_logger.logger.addHandler(socketHandler) - # Pynisher messages are debug, so we want to see them - local_logger.logger.setLevel(logging.DEBUG) - return local_logger diff --git a/test/test_automl/test_automl.py b/test/test_automl/test_automl.py index b6077269b0..29319a2234 100644 --- a/test/test_automl/test_automl.py +++ b/test/test_automl/test_automl.py @@ -308,7 +308,7 @@ def test_automl_outputs(backend, dask_client): 'ensemble_read_scores.pkl', 'runs', 'ensembles', - 'ensemble_history.csv', + 'ensemble_history.json', ] assert ( sorted(os.listdir(os.path.join(backend.temporary_directory, '.auto-sklearn')))