In [1]:
import numpy as np
import pandas as pd

import tensorflow as tf
import glob

In [2]:
##########################################
# source:
# https://github.com/theRealSuperMario/supermariopy/blob/master/scripts/tflogs2pandas.py
#########################################
import tensorflow as tf
import glob
import os
import pandas as pd
import traceback
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import click
import pprint


# Extraction function
def tflog2pandas(path: str) -> pd.DataFrame:
    """convert single tensorflow log file to pandas DataFrame
    
    Parameters
    ----------
    path : str
        path to tensorflow log file
    
    Returns
    -------
    pd.DataFrame
        converted dataframe
    """
    DEFAULT_SIZE_GUIDANCE = {
        "compressedHistograms": 1,
        "images": 1,
        "scalars": 0,  # 0 means load all
        "histograms": 1,
    }
    runlog_data = pd.DataFrame()
    try:
        event_acc = EventAccumulator(path, DEFAULT_SIZE_GUIDANCE)
        event_acc.Reload()
        tags = event_acc.Tags()["scalars"]
        for tag in tags:
            if tag == "rollout/return" or tag == "rollout/Q_mean": #<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Made change here
                event_list = event_acc.Scalars(tag)
                values = list(map(lambda x: x.value, event_list))
                step = list(map(lambda x: x.step, event_list))
                r = pd.Series(data=values, index=step, name=tag)
                runlog_data = pd.concat([runlog_data, r], axis=1)
    # Dirty catch of DataLossError
    except:
        print("Event file possibly corrupt: {}".format(path))
        traceback.print_exc()
    return runlog_data


def many_logs2pandas(event_paths):
    all_logs = pd.DataFrame()
    for path in event_paths:
        log = tflog2pandas(path)
        if log is not None:
            if all_logs.shape[0] == 0:
                all_logs = log
            else:
                all_logs = all_logs.append(log, ignore_index=True)
    return all_logs


def extract_tf_to_csv(logfile: str, write_pkl: bool, write_csv: bool, out_dir: str):
    """This is a enhanced version of https://gist.github.com/ptschandl/ef67bbaa93ec67aba2cab0a7af47700b
    This script exctracts variables from all logs from tensorflow event files ("event*"),
    writes them to Pandas and finally stores them a csv-file or pickle-file including all (readable) runs of the logging directory.
    Example usage:
    # create csv file from all tensorflow logs in provided directory (.) and write it to folder "./converted"
    tflogs2pandas.py . --csv --no-pkl --o converted
    # creaste csv file from tensorflow logfile only and write into and write it to folder "./converted"
    tflogs2pandas.py tflog.hostname.12345 --csv --no-pkl --o converted
    """
    pp = pprint.PrettyPrinter(indent=4)
    if os.path.isfile(logfile):
        event_paths = [logfile]
    else:
        raise ValueError(
            "input argument {} has to be a file".format(
                logfile
            )
        )
    # Call & append
    if event_paths:
        pp.pprint("Found tensorflow logs to process:")
        pp.pprint(event_paths)
        all_logs = many_logs2pandas(event_paths)
        pp.pprint("Head of created dataframe")
        pp.pprint(all_logs.head())

        os.makedirs(out_dir, exist_ok=True)
        run_id = logfile.split(".")[-2]+logfile.split(".")[-1]
        mode = logfile.split("/")[-1].split("\\")[1]
        if write_csv:
            print("saving to csv file")
            out_file = os.path.join(out_dir, "run_"+mode+"_"+run_id+"_file.csv")
            print(out_file)
            all_logs.to_csv(out_file, index=None)
        if write_pkl:
            print("saving to pickle file")
            out_file = os.path.join(out_dir, "all_training_logs_in_one_file.pkl")
            print(out_file)
            all_logs.to_pickle(out_file)
    else:
        print("No event paths have been found.")

In [3]:
folders = ["point125","point25","point5","point75"]
event_files = dict()
for folder in folders:
    print(folder+":")
    event_files[folder] = glob.glob("../SHARCNET/Results/debug/ddpg/300_300_NN/"+folder+"/**/events*", recursive=True)
    event_files[folder].sort()
    for f in event_files[folder]:
            print(f)

point125:
../SHARCNET/Results/debug/ddpg/300_300_NN/point125\PLA\2019-09-23-161920-0-0\summary\tb\events.out.tfevents.1569269985.gra96
../SHARCNET/Results/debug/ddpg/300_300_NN/point125\PLA\2019-09-23-161920-1-0\summary\tb\events.out.tfevents.1569269984.gra96
../SHARCNET/Results/debug/ddpg/300_300_NN/point125\SARA\2019-09-23-160800-0-0\summary\tb\events.out.tfevents.1569269307.gra139
../SHARCNET/Results/debug/ddpg/300_300_NN/point125\SARA\2019-09-23-160800-1-0\summary\tb\events.out.tfevents.1569269306.gra139
point25:
../SHARCNET/Results/debug/ddpg/300_300_NN/point25\PLA\2019-09-23-161336-1-0\summary\tb\events.out.tfevents.1569269641.gra641
../SHARCNET/Results/debug/ddpg/300_300_NN/point25\SARA\2019-09-23-161334-0-0\summary\tb\events.out.tfevents.1569269640.gra648
../SHARCNET/Results/debug/ddpg/300_300_NN/point25\SARA\2019-09-23-161334-1-0\summary\tb\events.out.tfevents.1569269639.gra649
point5:
../SHARCNET/Results/debug/ddpg/300_300_NN/point5\PLA\2019-09-23-161333-0-1\summary\tb\events

In [5]:
out_root_dir = "../SHARCNET/Results/Tensorboard_to_CSV/debug/ddpg/300_300_NN/"
for folder, files in event_files.items():
    out_dir = out_root_dir+folder
    for f in files:
        extract_tf_to_csv(logfile=f, write_pkl=False, write_csv=True, out_dir=out_dir)
        

'Found tensorflow logs to process:'
[   '../SHARCNET/Results/debug/ddpg/300_300_NN/point125\\PLA\\2019-09-23-161920-0-0\\summary\\tb\\events.out.tfevents.1569269985.gra96']
'Head of created dataframe'
   rollout/Q_mean  rollout/return
1       -0.003554        0.706008
2        0.023077        1.149397
3        0.027561        1.672490
4        0.016354        0.179354
5        0.018474        1.830795
saving to csv file
../SHARCNET/Results/Tensorboard_to_CSV/debug/ddpg/300_300_NN/point125\run_PLA_1569269985gra96_file.csv
'Found tensorflow logs to process:'
[   '../SHARCNET/Results/debug/ddpg/300_300_NN/point125\\PLA\\2019-09-23-161920-1-0\\summary\\tb\\events.out.tfevents.1569269984.gra96']
'Head of created dataframe'
   rollout/Q_mean  rollout/return
1        0.014396        4.039534
2        0.080840        2.612822
3        0.114473        4.666185
4        0.133131        2.977127
5        0.169071        7.828629
saving to csv file
../SHARCNET/Results/Tensorboard_to_CSV/debug/ddpg