In [1]:
import numpy as np
import pandas as pd

import tensorflow as tf
import glob

In [2]:
##########################################
# source:
# https://github.com/theRealSuperMario/supermariopy/blob/master/scripts/tflogs2pandas.py
#########################################
import tensorflow as tf
import glob
import os
import pandas as pd
import traceback
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import click
import pprint


# Extraction function
def tflog2pandas(path: str) -> pd.DataFrame:
    """convert single tensorflow log file to pandas DataFrame
    
    Parameters
    ----------
    path : str
        path to tensorflow log file
    
    Returns
    -------
    pd.DataFrame
        converted dataframe
    """
    DEFAULT_SIZE_GUIDANCE = {
        "compressedHistograms": 1,
        "images": 1,
        "scalars": 0,  # 0 means load all
        "histograms": 1,
    }
    runlog_data = pd.DataFrame({"metric": [], "value": [], "step": []})
    try:
        event_acc = EventAccumulator(path, DEFAULT_SIZE_GUIDANCE)
        event_acc.Reload()
        tags = event_acc.Tags()["scalars"]
        for tag in tags:
            if tag == "rollout/return": #<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Made change here
                event_list = event_acc.Scalars(tag)
                values = list(map(lambda x: x.value, event_list))
                step = list(map(lambda x: x.step, event_list))
                r = {"metric": [tag] * len(step), "value": values, "step": step}
                r = pd.DataFrame(r)
                runlog_data = pd.concat([runlog_data, r])
    # Dirty catch of DataLossError
    except:
        print("Event file possibly corrupt: {}".format(path))
        traceback.print_exc()
    return runlog_data


def many_logs2pandas(event_paths):
    all_logs = pd.DataFrame()
    for path in event_paths:
        log = tflog2pandas(path)
        if log is not None:
            if all_logs.shape[0] == 0:
                all_logs = log
            else:
                all_logs = all_logs.append(log, ignore_index=True)
    return all_logs


def extract_tf_to_csv(logfile: str, write_pkl: bool, write_csv: bool, out_dir: str):
    """This is a enhanced version of https://gist.github.com/ptschandl/ef67bbaa93ec67aba2cab0a7af47700b
    This script exctracts variables from all logs from tensorflow event files ("event*"),
    writes them to Pandas and finally stores them a csv-file or pickle-file including all (readable) runs of the logging directory.
    Example usage:
    # create csv file from all tensorflow logs in provided directory (.) and write it to folder "./converted"
    tflogs2pandas.py . --csv --no-pkl --o converted
    # creaste csv file from tensorflow logfile only and write into and write it to folder "./converted"
    tflogs2pandas.py tflog.hostname.12345 --csv --no-pkl --o converted
    """
    pp = pprint.PrettyPrinter(indent=4)
    if os.path.isfile(logfile):
        event_paths = [logfile]
    else:
        raise ValueError(
            "input argument {} has to be a file".format(
                logfile
            )
        )
    # Call & append
    if event_paths:
        pp.pprint("Found tensorflow logs to process:")
        pp.pprint(event_paths)
        all_logs = many_logs2pandas(event_paths)
        pp.pprint("Head of created dataframe")
        pp.pprint(all_logs.head())

        os.makedirs(out_dir, exist_ok=True)
        run_id = logfile.split(".")[-2]+logfile.split(".")[-1]
        mode = logfile.split("/")[-1].split("\\")[1]
        if write_csv:
            print("saving to csv file")
            out_file = os.path.join(out_dir, "run_"+mode+"_"+run_id+"_file.csv")
            print(out_file)
            all_logs.to_csv(out_file, index=None)
        if write_pkl:
            print("saving to pickle file")
            out_file = os.path.join(out_dir, "all_training_logs_in_one_file.pkl")
            print(out_file)
            all_logs.to_pickle(out_file)
    else:
        print("No event paths have been found.")

In [3]:
folders = ["point125","point25","point5","point75"]
event_files = dict()
for folder in folders:
    print(folder+":")
    event_files[folder] = glob.glob("../SHARCNET/Results/debug/"+folder+"/**/events*", recursive=True)
    event_files[folder].sort()
    for f in event_files[folder]:
            print(f)

point125:
../SHARCNET/Results/debug/point125\PLA\2019-09-04-233003-0\summary\tb\events.out.tfevents.1567654217.gra702
../SHARCNET/Results/debug/point125\PLA\2019-09-04-233004-0\summary\tb\events.out.tfevents.1567654218.gra29
../SHARCNET/Results/debug/point125\Random\2019-09-04-233003-0\summary\tb\events.out.tfevents.1567654213.gra632
../SHARCNET/Results/debug/point125\Random\2019-09-04-233006-0\summary\tb\events.out.tfevents.1567654215.gra635
../SHARCNET/Results/debug/point125\SARA\2019-09-04-233004-0\summary\tb\events.out.tfevents.1567654217.gra120
../SHARCNET/Results/debug/point125\SARA\2019-09-04-233006-0\summary\tb\events.out.tfevents.1567654221.gra97
point25:
../SHARCNET/Results/debug/point25\PLA\2019-09-04-233007-0\summary\tb\events.out.tfevents.1567654220.gra625
../SHARCNET/Results/debug/point25\PLA\2019-09-04-233020-0\summary\tb\events.out.tfevents.1567654236.gra636
../SHARCNET/Results/debug/point25\Random\2019-09-04-235330-0\summary\tb\events.out.tfevents.1567655620.gra741
../

In [4]:
out_root_dir = "../SHARCNET/Results/Tensorboard_to_CSV/debug/"
for folder, files in event_files.items():
    out_dir = out_root_dir+folder
    for f in files:
        extract_tf_to_csv(logfile=f, write_pkl=False, write_csv=True, out_dir=out_dir)
        

'Found tensorflow logs to process:'
[   '../SHARCNET/Results/debug/point125\\PLA\\2019-09-04-233003-0\\summary\\tb\\events.out.tfevents.1567654217.gra702']
'Head of created dataframe'
           metric     value  step
0  rollout/return  0.634566   1.0
1  rollout/return  5.091974   2.0
2  rollout/return  4.316052   3.0
3  rollout/return  2.663814   4.0
4  rollout/return  3.831974   5.0
saving to csv file
../SHARCNET/Results/Tensorboard_to_CSV/debug/point125\run_PLA_1567654217gra702_file.csv
'Found tensorflow logs to process:'
[   '../SHARCNET/Results/debug/point125\\PLA\\2019-09-04-233004-0\\summary\\tb\\events.out.tfevents.1567654218.gra29']
'Head of created dataframe'
           metric     value  step
0  rollout/return  0.227776   1.0
1  rollout/return  0.906292   2.0
2  rollout/return  4.140535   3.0
3  rollout/return  3.080291   4.0
4  rollout/return  0.252620   5.0
saving to csv file
../SHARCNET/Results/Tensorboard_to_CSV/debug/point125\run_PLA_1567654218gra29_file.csv
'Found tenso

'Head of created dataframe'
           metric      value  step
0  rollout/return   9.253086   1.0
1  rollout/return  24.408449   2.0
2  rollout/return  18.162600   3.0
3  rollout/return  12.470254   4.0
4  rollout/return  16.249615   5.0
saving to csv file
../SHARCNET/Results/Tensorboard_to_CSV/debug/point5\run_SARA_1567655628gra745_file.csv
'Found tensorflow logs to process:'
[   '../SHARCNET/Results/debug/point5\\SARA\\2019-09-06-030101-0\\summary\\tb\\events.out.tfevents.1567753274.gra104']
'Head of created dataframe'
           metric      value  step
0  rollout/return   6.371902   1.0
1  rollout/return   9.382892   2.0
2  rollout/return  21.512251   3.0
3  rollout/return   6.265267   4.0
4  rollout/return  11.035533   5.0
saving to csv file
../SHARCNET/Results/Tensorboard_to_CSV/debug/point5\run_SARA_1567753274gra104_file.csv
'Found tensorflow logs to process:'
[   '../SHARCNET/Results/debug/point5\\SARA\\2019-09-06-030101-0\\summary\\tb\\events.out.tfevents.1567753274.gra643']
'H