In [21]:
# Imports for analysis
from smart_open import open as smart_open
import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt

pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 150)

## Parse results log file


In [2]:
LOG_FILE_PATH = "results/CartPole-v1/train-a2c/20210405-225734/all_results.txt"

RESULTS = []
SEPARATOR = "=="

with smart_open(LOG_FILE_PATH, "r", encoding="utf-8", errors="ignore") as f:
    contents = f.readlines()

    # Strip unnecessary/empty new lines
    contents = [line for line in contents if line != "\n"]
    contents = [line for line in contents if not line.startswith(SEPARATOR)]

    enumerated_contents = list(enumerate(contents, 1))

    for _ in range(0, len(enumerated_contents)-1, 4):
        result = {}
        result["datetime"] = enumerated_contents[_][1].split(":")[1]
        result["hyperparameters"] = enumerated_contents[_ + 1][1].split(":", 1)[1]
        result["results"] = enumerated_contents[_ + 2][1].split(":", 1)[1]
        result["wall_time"] = enumerated_contents[_ + 3][1].split(":")[1]

        RESULTS.append(result)        

len(RESULTS)


54

In [100]:
results_df = pd.DataFrame.from_dict(RESULTS)
results_df.head(5)

Unnamed: 0,datetime,hyperparameters,results,wall_time
0,20210405-225734\n,"num_env:6, num_episodes:50000, learning_rate:0...","min_reward:21.3, max_reward:139.2, reward_vari...",120.04074515500179\n
1,20210405-225734\n,"num_env:6, num_episodes:50000, learning_rate:0...","min_reward:15.1, max_reward:184.4, reward_vari...",126.65153170299891\n
2,20210405-225734\n,"num_env:6, num_episodes:50000, learning_rate:0...","min_reward:15.4, max_reward:202.4, reward_vari...",129.78630696300024\n
3,20210405-225734\n,"num_env:6, num_episodes:50000, learning_rate:0...","min_reward:18.5, max_reward:205.1, reward_vari...",134.3010969499992\n
4,20210405-225734\n,"num_env:6, num_episodes:50000, learning_rate:0...","min_reward:17.0, max_reward:178.5, reward_vari...",130.13781459500024\n


In [101]:
# Strip new lines
results_df["datetime"] = results_df["datetime"].str.strip()
results_df["hyperparameters"] = results_df["hyperparameters"].str.strip()
results_df["results"] = results_df["results"].str.strip()
results_df["results"] = results_df["results"].str.replace("::", ":-") # This is to fix a bad replace bug during logging - negative numbers must be preserved
results_df["wall_time"] = results_df["wall_time"].str.strip()
results_df.head(3)

Unnamed: 0,datetime,hyperparameters,results,wall_time
0,20210405-225734,"num_env:6, num_episodes:50000, learning_rate:0...","min_reward:21.3, max_reward:139.2, reward_vari...",120.0407451550018
1,20210405-225734,"num_env:6, num_episodes:50000, learning_rate:0...","min_reward:15.1, max_reward:184.4, reward_vari...",126.65153170299892
2,20210405-225734,"num_env:6, num_episodes:50000, learning_rate:0...","min_reward:15.4, max_reward:202.4, reward_vari...",129.78630696300024


In [102]:
# Split items in hyperparameters and results columns into lists
results_df["hyperparameters"] = results_df["hyperparameters"].str.split(",", 3)
results_df["results"] = results_df["results"].str.split(",")
results_df.head(3)

Unnamed: 0,datetime,hyperparameters,results,wall_time
0,20210405-225734,"[num_env:6, num_episodes:50000, learning_rat...","[min_reward:21.3, max_reward:139.2, reward_v...",120.0407451550018
1,20210405-225734,"[num_env:6, num_episodes:50000, learning_rat...","[min_reward:15.1, max_reward:184.4, reward_v...",126.65153170299892
2,20210405-225734,"[num_env:6, num_episodes:50000, learning_rat...","[min_reward:15.4, max_reward:202.4, reward_v...",129.78630696300024


In [103]:
# Make reliable index column
results_df.reset_index(level=0, inplace=True)

In [126]:
flattened_hyperparameters_df = pd.DataFrame(results_df["hyperparameters"].to_list(), columns=["num_env", "num_episodes", "learning_rate", "hidden_layers"], index=results_df["index"])
flattened_results_df = pd.DataFrame(results_df["results"].to_list(), columns=["min_reward", "max_reward", "reward_variance", "mean_reward", "mean_actor_loss", "mean_critic_loss", "mean_entropy_loss", "mean_overall_loss"], index=results_df["index"])


In [127]:
flattened_hyperparameters_df.reset_index(level=0, inplace=True)
flattened_results_df.reset_index(level=0, inplace=True)

In [128]:
preprocessed_results_df = results_df.join(flattened_hyperparameters_df, on="index", lsuffix="_p")
preprocessed_results_df = results_df.join(flattened_results_df, on="index", lsuffix="_r")
preprocessed_results_df.head(3)


Unnamed: 0,index_r,datetime,hyperparameters,results,wall_time,index,min_reward,max_reward,reward_variance,mean_reward,mean_actor_loss,mean_critic_loss,mean_entropy_loss,mean_overall_loss
0,0,20210405-225734,"[num_env:6, num_episodes:50000, learning_rat...","[min_reward:21.3, max_reward:139.2, reward_v...",120.0407451550018,0,min_reward:21.3,max_reward:139.2,reward_variance:26.496033589954553,mean_reward:77.29800000000002,mean_actor_loss:-0.11034903979808929,mean_critic_loss:2.3578196430739373,mean_entropy_loss:-3.2299348449707033,mean_overall_loss:2.2442579316822346
1,1,20210405-225734,"[num_env:6, num_episodes:50000, learning_rat...","[min_reward:15.1, max_reward:184.4, reward_v...",126.65153170299892,1,min_reward:15.1,max_reward:184.4,reward_variance:37.07680115651834,mean_reward:90.30400000000002,mean_actor_loss:-0.09831048473428179,mean_critic_loss:1.9740386339361984,mean_entropy_loss:-3.166618962287903,mean_overall_loss:1.8725499991586898
2,2,20210405-225734,"[num_env:6, num_episodes:50000, learning_rat...","[min_reward:15.4, max_reward:202.4, reward_v...",129.78630696300024,2,min_reward:15.4,max_reward:202.4,reward_variance:35.883354135309034,mean_reward:99.63600000000001,mean_actor_loss:-0.10236293252928909,mean_critic_loss:1.819860874202171,mean_entropy_loss:-3.1372220420837404,mean_overall_loss:1.7143505113069666


In [129]:
preprocessed_results_df = preprocessed_results_df.drop(columns=["index_r", "hyperparameters", "results"])
preprocessed_results_df.head(2)

Unnamed: 0,datetime,wall_time,index,min_reward,max_reward,reward_variance,mean_reward,mean_actor_loss,mean_critic_loss,mean_entropy_loss,mean_overall_loss
0,20210405-225734,120.0407451550018,0,min_reward:21.3,max_reward:139.2,reward_variance:26.496033589954553,mean_reward:77.29800000000002,mean_actor_loss:-0.11034903979808929,mean_critic_loss:2.3578196430739373,mean_entropy_loss:-3.2299348449707033,mean_overall_loss:2.2442579316822346
1,20210405-225734,126.65153170299892,1,min_reward:15.1,max_reward:184.4,reward_variance:37.07680115651834,mean_reward:90.30400000000002,mean_actor_loss:-0.09831048473428179,mean_critic_loss:1.9740386339361984,mean_entropy_loss:-3.166618962287903,mean_overall_loss:1.8725499991586898


In [130]:
_columns = ["index", "datetime", "wall_time", "min_reward", "max_reward", "mean_reward", "reward_variance", "mean_actor_loss", "mean_critic_loss", "mean_entropy_loss", "mean_overall_loss"]

preprocessed_results_df = preprocessed_results_df[_columns]
preprocessed_results_df.head(1)

Unnamed: 0,index,datetime,wall_time,min_reward,max_reward,mean_reward,reward_variance,mean_actor_loss,mean_critic_loss,mean_entropy_loss,mean_overall_loss
0,0,20210405-225734,120.0407451550018,min_reward:21.3,max_reward:139.2,mean_reward:77.29800000000002,reward_variance:26.496033589954553,mean_actor_loss:-0.11034903979808929,mean_critic_loss:2.3578196430739373,mean_entropy_loss:-3.2299348449707033,mean_overall_loss:2.2442579316822346


In [131]:
preprocessed_results_df.shape

(54, 11)

In [132]:
# Remove text from each column
preprocessed_results_df["min_reward"] = preprocessed_results_df["min_reward"].str.split(":").str[1]
preprocessed_results_df["max_reward"] = preprocessed_results_df["max_reward"].str.split(":").str[1]
preprocessed_results_df["mean_reward"] = preprocessed_results_df["mean_reward"].str.split(":").str[1]
preprocessed_results_df["reward_variance"] = preprocessed_results_df["reward_variance"].str.split(":").str[1]
preprocessed_results_df["mean_actor_loss"] = preprocessed_results_df["mean_actor_loss"].str.split(":").str[1]
preprocessed_results_df["mean_critic_loss"] = preprocessed_results_df["mean_critic_loss"].str.split(":").str[1]
preprocessed_results_df["mean_entropy_loss"] = preprocessed_results_df["mean_entropy_loss"].str.split(":").str[1]
preprocessed_results_df["mean_overall_loss"] = preprocessed_results_df["mean_overall_loss"].str.split(":").str[1]
preprocessed_results_df.head(3)

Unnamed: 0,index,datetime,wall_time,min_reward,max_reward,mean_reward,reward_variance,mean_actor_loss,mean_critic_loss,mean_entropy_loss,mean_overall_loss
0,0,20210405-225734,120.0407451550018,21.3,139.2,77.29800000000002,26.496033589954557,-0.1103490397980892,2.3578196430739373,-3.2299348449707037,2.244257931682234
1,1,20210405-225734,126.65153170299892,15.1,184.4,90.30400000000002,37.07680115651834,-0.0983104847342817,1.9740386339361984,-3.166618962287903,1.8725499991586896
2,2,20210405-225734,129.78630696300024,15.4,202.4,99.636,35.883354135309034,-0.102362932529289,1.819860874202171,-3.1372220420837404,1.7143505113069666


In [133]:
# Save results to CSV
preprocessed_results_df.to_csv("training_results_20210405-225734.csv", index=False)

## Analyse training results