# Compute the running time per experiment

## Global imports and variables

In [1]:
# Import for interactive notebook (see:
# https://ipywidgets.readthedocs.io/en/stable/examples/Using%20Interact.html)
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import Layout


# Import to list files in directories
import glob

# Import for regular expressions
import re

# Imports for path operations
import os
import os.path

# For date operations
from datetime import datetime

import pandas as pd
pd.set_option('max_colwidth', -1)
import matplotlib.pyplot as plt

import numpy as np
import configparser

# import jtplot module in notebook
from jupyterthemes import jtplot

# choose which theme to inherit plotting style from
# onedork | grade3 | oceans16 | chesterish | monokai | solarizedl | solarizedd
jtplot.style(theme='onedork')

import datetime

In [2]:
RESULTS_DIR = "/Users/gomerudo/workspace/thesis_results"

def rettext(text):
    return text

def search_in_file(file, pattern):
    pattern = re.compile(pattern)
    results = []
    for i, line in enumerate(open(file)):
        for match in re.finditer(pattern, line):
            results.append(match.groups())
    return results

form_item_layout = Layout(
    width="50%"
)

w_resdirs = interactive(
    rettext,
#     text=sorted(glob.glob("{dir}/[mix-]?[0-9]*".format(dir=RESULTS_DIR))),
    text=sorted(glob.glob("{dir}/*".format(dir=RESULTS_DIR))),
    layout=form_item_layout
    
)

## Selecting the desired results

In [4]:
display(w_resdirs)

interactive(children=(Dropdown(description='text', options=('/Users/gomerudo/workspace/thesis_results/27969', …

## Results

In [15]:
################################################################################
############ OBTAIN THE FILES AND DIRECTORIES TO QUERY FOR ANALYSIS ############
################################################################################

# Obtain the chosen directory
chosen_dir = w_resdirs.result

# experiments dir
exp_dir = glob.glob("{dir}/experiment*[!.zip]".format(dir=chosen_dir))[0]

# This is a list of all openai dirs, sorted by name (hence, by timestamp)
openai_dirs = sorted(glob.glob("{dir}/openai*[!.zip]".format(dir=exp_dir)))

# A simple DB of experiments and actions_info.csv should be there
dbexp_file = glob.glob("{dir}/db_experiments.csv".format(dir=exp_dir))[0]
ainfo_file = glob.glob("{dir}/actions_info.csv".format(dir=exp_dir))[0]
config_file = glob.glob("{dir}/config*.ini".format(dir=exp_dir))[0]
flog_file = glob.glob("{dir}/sl*".format(dir=chosen_dir))[0]

# Make dataframes for the db of experiments and the actions summary
dbexp_df = pd.read_csv(dbexp_file)
ainfo_df = pd.read_csv(ainfo_file)

# Make de target directory
import os
summaries_dir = "{exp}/summary".format(exp=chosen_dir)
if not os.path.isdir(summaries_dir):
    os.mkdir(summaries_dir)

In [6]:
# ################################################################################
# ########### BUILD THE RELEVANT DATA FRAMES TO PRINT FOR MAIN SUMMARY ###########
# ################################################################################
    
# # Try to obtain the current times
# # running_times = search_in_file(flog_file, ".*\s+(.*)elapsed")
# # if len(running_times) == len(openai_dirs):
# #     f_running_times = []
# #     for time in running_times:
# #         time_cleansed = time[0].split(".")[0]
# #         f_running_times.append(time_cleansed)
# # else:
# # prev_timestamp = 0
# f_running_times = []
# for directory in openai_dirs:
#     exp_dirname_only = os.path.basename(directory)
#     timestamp = os.path.basename(exp_dirname_only.split("-")[1])
#     d2 = datetime.strptime(timestamp, "%Y%m%d%H%M%S")
#     if prev_timestamp:  # 2019 05 29 211533
#         d1 = datetime.strptime(prev_timestamp, "%Y%m%d%H%M%S")
#         f_running_times.append(str(d2 - d1))
#     prev_timestamp = timestamp
# f_running_times.append("NA")

# openai_dirs_df = pd.DataFrame(zip(openai_dirs, f_running_times), columns=["Log directory", "Runtime"])

# # 4. Search all exceptions
# exceptions_all = search_in_file(flog_file, "failed with exception of type.*<(.*)>.*Message.*:\s*(.*)")
# n_exceptions = len(exceptions_all)

# exceptions_set = set()
# for error, message in exceptions_all:
#     exceptions_set.add(error)

# config = configparser.ConfigParser()

# _ = config.read(config_file)

### Summary

- **Chosen results directory is:** {{chosen_dir}}
- **Full log is available at:** {{flog_file}}

#### Configuration

- **Log Path:** {{config['DEFAULT']['LogPath']}}
- **Environment:** {{config['bash']['Environment']}}

##### Reinforcement Learning

- **Algorithm:** {{config['bash']['Algorithm']}}
- **Policy representation:** {{config['bash']['Network']}}
- **Number of steps:** {{config['bash']['NSteps']}}
- **Total number of timestamps:** {{config['bash']['NumTimesteps']}}
- **Number of actions:** {{ainfo_df.shape[0]}}

##### NAS details

- **Config file:** {{config['nasenv.default']['ConfigFile']}}
- **Max Steps:** {{config['nasenv.default']['MaxSteps']}}
- **DB of experiments:** {{config['nasenv.default']['DbFile']}}
- **Dataset Handler:** {{config['nasenv.default']['DatasetHandler']}}
- **Action Space Type:** {{config['nasenv.default']['ActionSpaceType']}}
- **Trainer:** {{config['nasenv.default']['TrainerType']}}

##### Training details

- **Batch size:** {{config['trainer.default']['BatchSize']}}
- **Epochs:** {{config['trainer.default']['NEpochs']}}
- **Distributed:** {{config['trainer.tensorflow']['EnableDistributed']}}

##### Meta-dataset details

- **TFRecordsRootDir:** {{config['metadataset']['TFRecordsRootDir']}}
- **DatasetID:** {{config['metadataset']['DatasetID']}}

#### Individual run directories/time

{{openai_dirs_df}}

#### Errors found in log while building networks

- **Total number of exceptions:** {{n_exceptions}}

{{pd.DataFrame(exceptions_set, columns = ["Error type"])}}

In [12]:
def trial_summary(trial_log, include_repeated=True):
    # Read in try catch because the file can be corrupted or might not exist
    total_runtime = 0 
    trial_df = pd.read_csv(trial_log)
#     trial_df = trial_df[] # Random search only
    all_archs = set()
    # Iterate the log
    for idx, row in trial_df.iterrows():
        # Obtain the information information
        arch_id = row['composed_id']
        running_time = int(row['running_time'])
        running_time = 0 if not include_repeated and arch_id in all_archs else running_time
        total_runtime += running_time
        # add to list at the end
        all_archs.add(arch_id)
    return total_runtime

# Obtain statistics for each trial 
times = []
for i, openai_dir in enumerate(openai_dirs):
    try:
        trial_log = sorted(glob.glob("{dir}/episode_logs/*".format(dir=openai_dir)))[0]
        r_time = trial_summary(trial_log, True)
        times.append(r_time)
    except IndexError:
        print("Could not read the episode_logs in {}".format(openai_dir))
        pass

total_time = sum(times)
print(str(datetime.timedelta(seconds=total_time)))

times = []
for i, openai_dir in enumerate(openai_dirs):
    try:
        trial_log = sorted(glob.glob("{dir}/episode_logs/*".format(dir=openai_dir)))[0]
        r_time = trial_summary(trial_log, False)
        times.append(r_time)
    except IndexError:
        print("Could not read the episode_logs in {}".format(openai_dir))
        pass

reduced_total_time = sum(times)
print(str(datetime.timedelta(seconds=reduced_total_time)))


Could not read the episode_logs in /Users/gomerudo/workspace/thesis_results/32261/experiment-20190910012715/openai-20190910012715
0:00:00
Could not read the episode_logs in /Users/gomerudo/workspace/thesis_results/32261/experiment-20190910012715/openai-20190910012715
0:00:00


In [9]:
def trial_summary(trial_log, include_repeated=True):
    # Read in try catch because the file can be corrupted or might not exist
    total_runtime = 0 
    trial_df = pd.read_csv(trial_log)
    trial_df = trial_df[:6000] # Random search only
    all_archs = set()
    # Iterate the log
    for idx, row in trial_df.iterrows():
        # Obtain the information information
        arch_id = row['composed_id']
        running_time = int(row['running_time'])
        running_time = 0 if not include_repeated and arch_id in all_archs else running_time
        total_runtime += running_time
        # add to list at the end
        all_archs.add(arch_id)
    return total_runtime

# Obtain statistics for each trial 
times = []
# for i, openai_dir in enumerate(openai_dirs):
try:
    trial_log = sorted(glob.glob("{dir}/episode_logs/*".format(dir=exp_dir)))[0]
    r_time = trial_summary(trial_log, True)
    times.append(r_time)
except IndexError:
    print("Could not read the episode_logs in {}".format(openai_dir))
#         pass

total_time = sum(times)
print(str(datetime.timedelta(seconds=total_time)))

times = []
# for i, openai_dir in enumerate(openai_dirs):
try:
#         trial_log = sorted(glob.glob("{dir}/episode_logs/*".format(dir=openai_dir)))[0]
    trial_log = sorted(glob.glob("{dir}/episode_logs/*".format(dir=exp_dir)))[0]
    r_time = trial_summary(trial_log, False)
    times.append(r_time)
except IndexError:
    print("Could not read the episode_logs in {}".format(openai_dir))
    pass

reduced_total_time = sum(times)
print(str(datetime.timedelta(seconds=reduced_total_time)))



Could not read the episode_logs in /Users/gomerudo/workspace/thesis_results/32261/experiment-20190910012715/openai-20190910012715
0:00:00
Could not read the episode_logs in /Users/gomerudo/workspace/thesis_results/32261/experiment-20190910012715/openai-20190910012715
0:00:00


In [16]:
def trial_summary(trial_log, include_repeated=True):
    # Read in try catch because the file can be corrupted or might not exist
    total_runtime = 0 
    trial_df = pd.read_csv(trial_log)
#     trial_df = trial_df[] # Random search only
    all_archs = set()
    # Iterate the log
    for idx, row in trial_df.iterrows():
        # Obtain the information information
        arch_id = row['composed_id']
        running_time = int(row['running_time'])
        running_time = 0 if not include_repeated and arch_id in all_archs else running_time
        total_runtime += running_time
        # add to list at the end
        all_archs.add(arch_id)
    return total_runtime

# Obtain statistics for each trial 
times = []
for i, openai_dir in enumerate(openai_dirs):
    try:
        trial_log = sorted(glob.glob("{dir}/play_logs/*".format(dir=openai_dir)))[0]
        r_time = trial_summary(trial_log, True)
        times.append(r_time)
    except IndexError:
        print("Could not read the episode_logs in {}".format(openai_dir))
        pass

total_time = sum(times)
print(str(datetime.timedelta(seconds=total_time)))

times = []
for i, openai_dir in enumerate(openai_dirs):
    try:
        trial_log = sorted(glob.glob("{dir}/play_logs/*".format(dir=openai_dir)))[0]
        r_time = trial_summary(trial_log, False)
        times.append(r_time)
    except IndexError:
        print("Could not read the episode_logs in {}".format(openai_dir))
        pass

reduced_total_time = sum(times)
print(str(datetime.timedelta(seconds=reduced_total_time)))



5 days, 17:07:47
2 days, 22:04:14
