# Problem Statement

The `fast.ai` library has a callback to track training metrics history. However, the history is reported via console, or Jupyter widget, and there are no callbacks to store these results into CSV format. In this notebook, the author proposes his approach to implement a callback similar to [CSVLogger from Keras library](https://github.com/keras-team/keras/blob/master/keras/callbacks.py#L1135) which will save tracked metrics into persistent file.

In [1]:
%reload_ext autoreload

In [2]:
%autoreload 2

In [3]:
from fastai import *
from fastai.torch_core import *
from fastai.vision import *
from fastai.metrics import *
from torchvision.models import resnet18

In [4]:
@dataclass
class CSVLogger(LearnerCallback):
    "A `LearnerCallback` that saves history of training metrics into CSV file."
    filename: str = 'history'

    def __post_init__(self):
        self.path = self.learn.path/f'{self.filename}.csv'
        self.file = None

    @property
    def header(self):
        return self.learn.recorder.names

    def read_logged_file(self):
        return pd.read_csv(self.path)

    def on_train_begin(self, metrics_names: StrList, **kwargs: Any) -> None:
        self.path.parent.mkdir(parents=True, exist_ok=True)
        self.file = self.path.open('w')
        self.file.write(','.join(self.header) + '\n')

    def on_epoch_end(self, epoch: int, smooth_loss: Tensor, last_metrics: MetricsList, **kwargs: Any) -> bool:
        self.write_stats([epoch, smooth_loss] + last_metrics)

    def on_train_end(self, **kwargs: Any) -> None:
        self.file.flush()
        self.file.close()

    def write_stats(self, stats: TensorOrNumList) -> None:
        stats = [str(stat) if isinstance(stat, int) else f'{stat:.6f}'
                 for name, stat in zip(self.header, stats)]
        str_stats = ','.join(stats)
        self.file.write(str_stats + '\n')

## Example

Let's train MNIST classifier and track its metrics. All the metrics listed in `metrics` array, and also epoch number, train and valid loss should be saved into file. Then we can read this file and process somehow.  

In [5]:
path = untar_data(URLs.MNIST_TINY)

In [6]:
data = ImageDataBunch.from_folder(path)

In [7]:
learn = Learner(data, simple_cnn((3, 10, 10)), metrics=[accuracy, error_rate])

In [8]:
cb = CSVLogger(learn)

In [9]:
learn.fit(3, callbacks=[cb])

Total time: 00:02
epoch  train_loss  valid_loss  accuracy  error_rate
1      2.249624    2.172361    0.505007  0.494993    (00:00)
2      2.118121    1.730644    0.505007  0.494993    (00:00)
3      1.858596    1.108214    0.505007  0.494993    (00:00)



In [10]:
log_df = cb.read_logged_file()
log_df

Unnamed: 0,epoch,train_loss,valid_loss,accuracy,error_rate
0,1,2.249624,2.172361,0.505007,0.494993
1,2,2.118121,1.730644,0.505007,0.494993
2,3,1.858596,1.108214,0.505007,0.494993


## Tests

The tests are present in in [test_logger.py](./test_logger.py) file and could be invoked with command:
```bash
$ python -m pytest test_logger.py
```

To keep all PRs code in a single place, here is the content of aforementioned file:

In [11]:
from io import StringIO
from contextlib import redirect_stdout

import pytest
from fastai import *
from fastai.vision import *
from fastai.metrics import *
from fastprogress import fastprogress

from logger import CSVLogger


def test_callback_has_required_properties_after_init(classifier):
    cb = CSVLogger(classifier)

    assert cb.filename
    assert not cb.path.exists()
    assert cb.learn is classifier
    assert cb.file is None


def test_callback_writes_learn_metrics_during_training(classifier_and_logger):
    n_epochs = 3
    classifier, cb = classifier_and_logger

    classifier.fit(n_epochs, callbacks=[cb])

    log_df = cb.read_logged_file()
    assert cb.path.exists()
    assert cb.file.closed
    assert not log_df.empty
    assert len(log_df) == n_epochs
    assert classifier.recorder.names == log_df.columns.tolist()


# We can drop this test if you think it doesn't make too much sense testing equality of 
# stdout progress output with CSV content.
def test_callback_written_metrics_are_equal_to_reported_via_stdout(classifier_and_logger, no_bar):
    n_epochs = 3
    classifier, cb = classifier_and_logger

    buffer = StringIO()
    with redirect_stdout(buffer):
        classifier.fit(n_epochs, callbacks=[cb])

    csv_df = cb.read_logged_file()
    stdout_df = convert_into_dataframe(buffer)
    pd.testing.assert_frame_equal(csv_df, stdout_df)


def test_callback_written_metrics_are_equal_to_values_stored_in_reporter(classifier_and_logger):
    n_epochs = 3
    classifier, cb = classifier_and_logger

    classifier.fit(n_epochs, callbacks=[cb])

    csv_df = cb.read_logged_file()
    recorder_df = create_metrics_dataframe(classifier)
    pd.testing.assert_frame_equal(csv_df, recorder_df)


@pytest.fixture
def classifier(tmpdir):
    path = untar_data(URLs.MNIST_TINY)
    bunch = ImageDataBunch.from_folder(path)
    model_path = str(tmpdir.join('classifier'))
    learn = Learner(bunch, simple_cnn((3, 10, 10)), path=model_path)
    return learn


@pytest.fixture
def classifier_and_logger(classifier):
    classifier.metrics = [accuracy, error_rate]
    cb = CSVLogger(classifier)
    return classifier, cb


@pytest.fixture
def no_bar():
    fastprogress.NO_BAR = True
    yield
    fastprogress.NO_BAR = False


def convert_into_dataframe(buffer):
    "Converts data captured from `fastprogress.ConsoleProgressBar` into dataframe."
    lines = buffer.getvalue().split('\n')
    header, *lines = [l.strip() for l in lines if l]
    header = header.split()
    floats = [[float(x) for x in line.split()] for line in lines]
    records = [dict(zip(header, metrics_list)) for metrics_list in floats]
    df = pd.DataFrame(records, columns=header)
    df['epoch'] = df['epoch'].astype(int)
    return df


def create_metrics_dataframe(learn):
    "Converts metrics stored in `Recorder` into dataframe."
    records = [
        [i, loss, val_loss, *epoch_metrics]
        for i, (loss, val_loss, epoch_metrics)
        in enumerate(zip(
            get_train_losses(learn),
            learn.recorder.val_losses,
            learn.recorder.metrics), 1)]
    return pd.DataFrame(records, columns=learn.recorder.names)


def get_train_losses(learn):
    "Returns list of training losses at the end of each training epoch."
    np_losses = [to_np(l).item() for l in learn.recorder.losses]
    batch_size = len(learn.data.train_dl)
    return [batch[-1] for batch in partition(np_losses, batch_size)]