In [None]:
from datetime import datetime
import os.path as op
import yaml
from IPython.display import Markdown, Latex
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pandas as pd

with open('../codecheck.yml') as f:
    codecheck_conf = yaml.safe_load(f)

def name_orcid(entry):
    'Helper function for Name + ORCID'
    return f"{entry['name']} (ORCID: [{entry['ORCID']}](https://orcid.org/{entry['ORCID']}))"

In [None]:
# Fill in basic info from manifest file:
Markdown(f'''# CODECHECK certificate {codecheck_conf['certificate']}{{-}}
## [{codecheck_conf['report'].split('://')[1]}]({codecheck_conf['report']}) {{-}}
[![CODECHECK logo](codecheck_logo.png)](https://codecheck.org.uk)''')

## CODECHECK summary{-}

In [None]:
summary_header = '''
Item | Value
:--- | :----
'''
summary_rows = [
    f"Title | *{codecheck_conf['paper']['title']}*",
    f"Authors | {', '.join([name_orcid(a) for a in codecheck_conf['paper']['authors']])}",
    f"Reference | [{codecheck_conf['paper']['reference'].split('://')[1]}]({codecheck_conf['paper']['reference']})",
    f"Repository | [{codecheck_conf['repository'].split('://')[1]}]({codecheck_conf['repository']})",
    f"Codechecker | {name_orcid(codecheck_conf['codechecker'])}",
    f"Date of check | {datetime.fromisoformat(codecheck_conf['check_time']).date()}",
    f"Summary | {codecheck_conf['summary'].strip()}",
]
Markdown(summary_header + '\n'.join(summary_rows))

## Summary of output files generated{-}

In [None]:
# To save some space, we remove the directory name
files_header = '''
File | Comment | Size
:----| :------ | ---:
'''
files_rows = [
    ('`' + op.basename(entry['file']) + '` | ' +
        entry.get('comment', '') + ' | ' +
        str(op.getsize(op.join('outputs', entry['file']))))
    for entry in codecheck_conf['manifest']
]
Markdown(files_header + '\n'.join(files_rows))

## Summary{-}

In [None]:
Markdown(codecheck_conf['summary'].strip())

## CODECHECKER notes{-}

### Workflow{-}
The original repository for the code was located at [github.com/tedinburgh/causality-review](https://github.com/tedinburgh/causality-review), and an earlier version had been archived at [zenodo.org/record/4657015](https://zenodo.org/record/4657015). 
I forked the repository at commit `010aa51a80d91857bea4f0aa33885183022ce59d` to [github.com/codecheckers/causality-review](https://github.com/codecheckers/causality-review) and started the CODECHECK. The original repository already contained a `codecheck.yml` MANIFEST, as well as a `README.md` file detailing the steps to run the code, a `requirements.txt` file stating the dependencies (with minimal versions), and a `codecheck-instructions.sh` script to automatically execute the steps detailed in the README file. The script can be downloaded individually; executing it will download the GitHub repository, set up a conda environment and run all the steps to reproduce the results. Since I already had cloned the full repository, I did not execute the script but instead only run the steps following the cloning of the repository. As suggested by the authors, I only reproduced one part of the simulation results (linear processes), since re-running all the simulations would have taken too long. All other figures and data tables were regenerated from stored results also present in the repository (`simulation-data/`).

### Execution of the workflow{-}
I ran everything on a somewhat outdated workstation (Intel(R) Xeon(R) CPU E5-1630 v3 @ 3.70GHz, 16GB RAM) on Ubuntu Linux 18.04. The simulation time was 4 hours, comparable to the 3 hours stated by the authors. Regenerating the figures took only about 1 minute, significantly shorter than the "up to 15 minutes" suggested by the authors. Creating the figures emitted a number of warnings (see below), but none of them seemed to affect the output and all figures were created successfully.

#### Output from running `python causality-review-code/misc_ci.py`{-}

In [None]:
%cat outputs/figures_err.txt

### Comparison of results with author repository{-}
By visual inspection, all regenerated figures are identical to the figures present in the repository. Comparison with `git diff-image` ([github.com/ewanmellor/git-diff-image](https://github.com/ewanmellor/git-diff-image)) showed minimal differences in some regions of the color plots of `hb_figure1.{pdf,eps}` and `hb_figure2.{pdf,eps}`, but these differences were not discernible by naked eye and seem to reflect very minor numerical differences. Given that my figures were generated with matplotlib 3.3.4 (see package versions at the end of this document) and the authors generated figures with 3.3.2, I suspected this version difference to be the reason, but a cursory check with a downgraded matplotlib did not change the result.
The generated file `ul-transforms.txt` (underlying Table III in the paper) is identical to the file in the repository, except for some irrelevant differences between `0.000` and `-0.000`.

The simulation results for the linear process simulations stored in `lp_values.csv` differ slightly in columns 9–12 and 19–20, reflecting very minor numerical differences. After rounding all values to 10 decimal digits, the results were exactly identical.

Since the file `lp_times.csv` contains execution times measured during the run of this CODECHECK, it differs from the files provided by the authors. This also holds for the column representing the values for the linear processes simulations in file `computational-times.txt`. The results do seem comparable to the authors' results, though, and the order of the methods is preserved. See below for a graphical comparison:

In [None]:
def extract_lp_times(fname):
    with open(fname) as f:
        lines = f.readlines()
    # extract first two columns
    methods, means, stds = [], [], []
    for line in lines:
        method, run_times = [l.strip() for l in line.split('&')[:2]]
        mean_time, std_time = run_times[:5], run_times[7:12]
        methods.append(method)
        means.append(float(mean_time))
        stds.append(float(std_time))
    return methods, means, stds
orig_methods, orig_means, orig_stds = extract_lp_times(op.join('..', 'figures', 'computational-times.txt'))
repr_methods, repr_means, repr_stds = extract_lp_times(op.join('outputs', 'figures', 'computational-times.txt'))
assert orig_methods == repr_methods
fig, ax = plt.subplots(figsize=(10, 5))
ax.set_yscale('log')
ax.errorbar(orig_methods, orig_means, orig_stds, fmt='o', label='original')
ax.errorbar(repr_methods, repr_means, repr_stds, fmt='o', label='reproduction')
ax.set_title('Computational requirements of linear process simulations (cf. first column of Table S.II in paper)')
_ = ax.legend()

### Comparison of results with arXiv preprint{-}
#### Table III{-}
The `codecheck.yml` manifest notes that the arXiv preprint has a small error in Table III in the baseline column for methods "TE (H)" and "ETE (H)", which I can confirm: the results in the repository state $\langle \mu \rangle = 0.675$ ("TE (H)") and $\langle \mu \rangle = 0.674$ ("ETE (H)"), wheras the paper states $\langle \mu \rangle = 0.673$ for both. However, I identified additional differences in the Gaussian noise column for the "NLGC" and "CCM" methods:

**Paper**

Method | $\sigma^2_G$ = 0.1 | $\sigma^2_G$ = 1 | $\sigma^2_G$ = 1
:------|-------------------:|-----------------:|----------------:
NLGC   |0.030               | 0.741            | -0.003
&nbsp; |0.972               | 1.335            | 2.313
CCM    |0.005               | 0.176            | -0.136
&nbsp; |0.981               | 0.986            | 0.951

**Repository (file `ul-transforms.txt`):**

Method | $\sigma^2_G$ = 0.1 | $\sigma^2_G$ = 1 | $\sigma^2_G$ = 1
:------|-------------------:|-----------------:|----------------:
NLGC   |0.031               | 0.740            | -0.007
&nbsp; |1.023               | 1.345            | 2.325
CCM    |0.013               | 0.151            | -0.075
&nbsp; |1.010               | 0.944            | 0.959
       
#### Figure 4{-}
The top part of Figure 4 (file `ul_figure.{pdf,eps}`) uses a different y axis scale for the EGC method in the repository file compared to the one included in the paper. As far as I can tell, the plotted values appear to be the same, i.e. it is just a question of "zoom level".

#### Figure S1{-}

There appears to be a small difference between the Figure S1 used in the arXiv preprint and the one in the repository (file `corr_transforms_plots.{pdf,eps}`). To confirm, I used the `pdfimages` tool to extract a png of the color plot from both the paper PDF and from the repository version, and plot them side by side:

In [None]:
paper_version = mpimg.imread('outputs/extracted_S1_paper.png')
repo_version = mpimg.imread('outputs/extracted_S1_repo.png')
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 6))
ax1.imshow(paper_version)
ax1.axis('off')
ax2.imshow(repo_version)
ax2.axis('off')
ax1.set_title('paper version')
_ = ax2.set_title('repository version')

While the difference is small, it seems to be too big to be simply explained by e.g. a color conversion process (note the differences in the lower left corner).

## Recommendation to the authors{-}
Overall, the authors provide very thorough and easy-to-follow steps for reproduction, and make it conveniently possible to only reproduce parts of their study by calling the respective scripts with command line arguments. Apart from clearing up the minor discrepancies detailed in the report above, I only have a few minor recommendations:

* It would be preferrable to have only one file for each figure instead of one PDF and one EPS version. Automatic treatement of the manifest file is also slightly impaired by the fact that the file comment is formally only attached to the EPS file entry but refers to both files.
* It would be helpful to clearly state if files are not expected to be reproduced exactly, e.g. if they represent measured execution times instead of calculated values (`lp_times.csv`, `computation-times.txt`).
* Long simulation runs (in this CODECHECK, the linear process simulations) would benefit from some indication of how much time (or how many iterations) is still needed to complete the run.
* The bold formatting in the tables (indicating e.g. minimum values per column) seems to have been added manually after the automatic generation of the tables. To avoid errors, it might make sense to have the code also take care of this highlighting.
* A very minor point: the `codechecker-instructions.sh` script contained in the repository is meant to be independent of the repository and starts by cloning it. It is unclear in what situation someone would have access to this script file but not have already cloned the repository. It might have been more straightforward to state in the README file to clone the repository, and then ask the user to execute the script file.

## Citing this document{-}

In [None]:
Markdown(f"{codecheck_conf['codechecker']['name']} "
         f"({datetime.fromisoformat(codecheck_conf['check_time']).year}). "
         f"CODECHECK Certificate {codecheck_conf['certificate']}. "
         f"Zenodo. [{codecheck_conf['report'].split('://')[1]}]({codecheck_conf['report']})")

## About CODECHECK{-}
This certificate confirms that the codechecker could independently reproduce the results of a computational analysis given the data and code from a third party. A CODECHECK does not check whether the original computation analysis is correct. However, as all materials required for the reproduction are freely availableby following the links in this document, the reader can then study for themselves the code and data.

## About this document{-}
This document was created using a [jupyter notebook](https://jupyter.org/) and converted into PDF via [nbconvert](https://nbconvert.readthedocs.io/), [pandoc](https://pandoc.org/), and [xelatex](http://xetex.sourceforge.net/). The command `make codecheck.pdf` will regenerate the report file.

## License{-}
The code, data, and figures created by the original authors are licensed under the MIT license (see their [LICENSE file](https://github.com/codecheckers/causality-review/blob/main/LICENSE)). The content of the `codecheck` directory and this report are licensed under the [CC BY 4.0 license](https://creativecommons.org/licenses/by/4.0/).

## Package versions{-}

In [None]:
%cat outputs/conda_list.txt

## Manifest files{-}

### CSV files{-}

In [None]:
full_markdown = []
for entry in codecheck_conf['manifest']:
    fname = entry['file']
    if not fname.endswith('.csv'):
        continue
    comment = entry.get('comment', None)
    df = pd.read_csv(op.join('outputs', fname), index_col=False, header=None)
    markdown = f'''### `{fname}` {{-}}
{('Author comment: *' + comment + '*') if comment else ' '}

**Column summary statistics:**

{df.describe().transpose().to_markdown(tablefmt="grid",
                                       floatfmt=('.0f', '.0f', '.4f', '.4f', '.4f', '.4f', '.4f', '.4f', '.4f'))}
'''
    full_markdown.append(markdown)

Markdown('\n\n'.join(full_markdown))

### LaTeX tables{-}

In [None]:
# LaTeX tables (only correctly displayed in LaTeX output/PDF)
# Hardcoded names for columns
columns = {'figures/computational-times.txt': '{lrrrrrrrr}',
           'figures/ul-transforms.txt': '{lllrrrrrrrrrrrr}'}
full_text = []
for entry in codecheck_conf['manifest']:
    fname = entry['file']
    if not fname.endswith('.txt'):
        continue
    assert fname in columns
    header = [r'\texttt{' + fname.replace('_', r'\_') + r'}\\',
              r'Author comment: \emph{' + entry.get('comment', '') + r'}\\', '',
              r'\begin{tiny}\begin{tabular}' + columns[fname]]
    footer = [r'\end{tabular}\end{tiny}', '', '']
    full_text.extend(header + [r'\input{outputs/' + fname + r'}'] + footer)
Latex('\n'.join(full_text))

### Figures{-}

In [None]:
# LaTeX tables (only correctly displayed in LaTeX output/PDF)
# The comments are only stored for the eps versions, copy them over
eps_comments = {entry['file']: entry.get('comment', '')
                for entry in codecheck_conf['manifest']
                if entry['file'].endswith('.eps')}
full_text = []
# Figures (only PDF versions)
for entry in codecheck_conf['manifest']:
    fname = entry['file']
    if not fname.endswith('.pdf'):
        continue
    comment = eps_comments[fname[:-4] + '.eps']
    full_text.extend([r'\texttt{' + fname.replace('_', r'\_') + r'}.\\',
                      r'Author comment: \emph{' + comment + r'}\\',
                      r'\includegraphics{outputs/' + fname + r'}',
                      '', ''])
Latex('\n'.join(full_text))