# Log processing

In [11]:
from smart_open import open as smart_open
import pandas as pd

## Parse results log file

In [17]:
# Please change these before you start, otherwise you could overwrite something

# LOG_FILE_PATH = "results/CartPole-v1/train-a2c/20210405-225734/all_results.txt"
# RESULTS_CSV = "training_results_20210405-225734.csv"

LOG_FILE_PATH = "results/CartPole-v0/train-a2c/20210407-003658/all_results.txt"
RESULTS_CSV = "training_results_20210407-003658.csv"

In [18]:
RESULTS = []
SEPARATOR = "=="

with smart_open(LOG_FILE_PATH, "r", encoding="utf-8", errors="ignore") as f:
    contents = f.readlines()

    # Strip unnecessary/empty new lines
    contents = [line for line in contents if line != "\n"]
    contents = [line for line in contents if not line.startswith(SEPARATOR)]

    enumerated_contents = list(enumerate(contents, 1))

    for _ in range(0, len(enumerated_contents)-1, 4):
        result = {}
        result["datetime"] = enumerated_contents[_][1].split(":")[1]
        result["hyperparameters"] = enumerated_contents[_ + 1][1].split(":", 1)[1]
        result["results"] = enumerated_contents[_ + 2][1].split(":", 1)[1]
        result["wall_time"] = enumerated_contents[_ + 3][1].split(":")[1]

        RESULTS.append(result)        

len(RESULTS)

54

In [19]:
results_df = pd.DataFrame.from_dict(RESULTS)
results_df.head(5)

Unnamed: 0,datetime,hyperparameters,results,wall_time
0,20210407-003658\n,"num_env:6, num_episodes:50000, learning_rate:0...","min_reward:14.9, max_reward:119.6, reward_vari...",127.93577520200003\n
1,20210407-003658\n,"num_env:6, num_episodes:50000, learning_rate:0...","min_reward:16.5, max_reward:148.4, reward_vari...",125.21537914199996\n
2,20210407-003658\n,"num_env:6, num_episodes:50000, learning_rate:0...","min_reward:15.4, max_reward:142.1, reward_vari...",128.042971476\n
3,20210407-003658\n,"num_env:6, num_episodes:50000, learning_rate:0...","min_reward:18.3, max_reward:144.8, reward_vari...",126.46709818199997\n
4,20210407-003658\n,"num_env:6, num_episodes:50000, learning_rate:0...","min_reward:14.8, max_reward:171.7, reward_vari...",127.82372352799996\n


## Data cleaning

In [20]:
# Strip new lines
results_df["datetime"] = results_df["datetime"].str.strip()
results_df["hyperparameters"] = results_df["hyperparameters"].str.strip()
results_df["results"] = results_df["results"].str.strip()
results_df["results"] = results_df["results"].str.replace("::", ":-") # This is to fix a bad replace bug during logging - negative numbers must be preserved
results_df["wall_time"] = results_df["wall_time"].str.strip()
# results_df.head(3)

In [21]:
# Split items in hyperparameters and results columns into lists
results_df["hyperparameters"] = results_df["hyperparameters"].str.split(",", 3)
results_df["results"] = results_df["results"].str.split(",")
# results_df.head(3)

In [22]:
# Make reliable index column
results_df.reset_index(level=0, inplace=True)

In [23]:
# Flatten hyperparameters and results columns
flattened_hyperparameters_df = pd.DataFrame(results_df["hyperparameters"].to_list(), columns=["num_env", "num_episodes", "learning_rate", "hidden_layers"], index=results_df["index"])
flattened_results_df = pd.DataFrame(results_df["results"].to_list(), columns=["min_reward", "max_reward", "reward_variance", "mean_reward", "mean_actor_loss", "mean_critic_loss", "mean_entropy_loss", "mean_overall_loss"], index=results_df["index"])
flattened_hyperparameters_df.reset_index(level=0, inplace=True)
flattened_results_df.reset_index(level=0, inplace=True)

In [24]:
# Join new hyperparameters and results dfs to main df
preprocessed_results_df = results_df.join(flattened_hyperparameters_df, on="index", lsuffix="_p")
preprocessed_results_df = preprocessed_results_df.join(flattened_results_df, on="index", lsuffix="_r")
preprocessed_results_df.head(3)

Unnamed: 0,index_p,datetime,hyperparameters,results,wall_time,index_r,num_env,num_episodes,learning_rate,hidden_layers,index,min_reward,max_reward,reward_variance,mean_reward,mean_actor_loss,mean_critic_loss,mean_entropy_loss,mean_overall_loss
0,0,20210407-003658,"[num_env:6, num_episodes:50000, learning_rat...","[min_reward:14.9, max_reward:119.6, reward_v...",127.93577520200004,0,num_env:6,num_episodes:50000,learning_rate:0.001,"hidden_layers:(32, 32)",0,min_reward:14.9,max_reward:119.6,reward_variance:25.978005235198488,mean_reward:72.66199999999999,mean_actor_loss:-0.10722807213896826,mean_critic_loss:2.42104439533448,mean_entropy_loss:-3.22899555683136,mean_overall_loss:2.3106003895982283
1,1,20210407-003658,"[num_env:6, num_episodes:50000, learning_rat...","[min_reward:16.5, max_reward:148.4, reward_v...",125.21537914199996,1,num_env:6,num_episodes:50000,learning_rate:0.001,"hidden_layers:(64, 64)",1,min_reward:16.5,max_reward:148.4,reward_variance:30.42119583448356,mean_reward:90.462,mean_actor_loss:-0.10751617150514116,mean_critic_loss:2.086534028534334,mean_entropy_loss:-3.17665771484375,mean_overall_loss:1.9758377702338388
2,2,20210407-003658,"[num_env:6, num_episodes:50000, learning_rat...","[min_reward:15.4, max_reward:142.1, reward_v...",128.042971476,2,num_env:6,num_episodes:50000,learning_rate:0.001,"hidden_layers:(128, 128)",2,min_reward:15.4,max_reward:142.1,reward_variance:27.70934687068607,mean_reward:98.764,mean_actor_loss:-0.1073100725279418,mean_critic_loss:1.8928728663544505,mean_entropy_loss:-3.1673620653152468,mean_overall_loss:1.7824184411410824


In [25]:
# Drop redundant columns
preprocessed_results_df = preprocessed_results_df.drop(columns=["index_r", "index_p", "hyperparameters", "results"])
preprocessed_results_df.head(2)

Unnamed: 0,datetime,wall_time,num_env,num_episodes,learning_rate,hidden_layers,index,min_reward,max_reward,reward_variance,mean_reward,mean_actor_loss,mean_critic_loss,mean_entropy_loss,mean_overall_loss
0,20210407-003658,127.93577520200004,num_env:6,num_episodes:50000,learning_rate:0.001,"hidden_layers:(32, 32)",0,min_reward:14.9,max_reward:119.6,reward_variance:25.978005235198488,mean_reward:72.66199999999999,mean_actor_loss:-0.10722807213896826,mean_critic_loss:2.42104439533448,mean_entropy_loss:-3.22899555683136,mean_overall_loss:2.3106003895982283
1,20210407-003658,125.21537914199996,num_env:6,num_episodes:50000,learning_rate:0.001,"hidden_layers:(64, 64)",1,min_reward:16.5,max_reward:148.4,reward_variance:30.42119583448356,mean_reward:90.462,mean_actor_loss:-0.10751617150514116,mean_critic_loss:2.086534028534334,mean_entropy_loss:-3.17665771484375,mean_overall_loss:1.9758377702338388


In [26]:
# Rearrange columns for better readability
_columns = ["index", "datetime", "wall_time", "num_env", "num_episodes", "learning_rate", "hidden_layers", "min_reward", "max_reward", "mean_reward", "reward_variance", "mean_actor_loss", "mean_critic_loss", "mean_entropy_loss", "mean_overall_loss"]

preprocessed_results_df = preprocessed_results_df[_columns]
preprocessed_results_df.head(1)

Unnamed: 0,index,datetime,wall_time,num_env,num_episodes,learning_rate,hidden_layers,min_reward,max_reward,mean_reward,reward_variance,mean_actor_loss,mean_critic_loss,mean_entropy_loss,mean_overall_loss
0,0,20210407-003658,127.93577520200004,num_env:6,num_episodes:50000,learning_rate:0.001,"hidden_layers:(32, 32)",min_reward:14.9,max_reward:119.6,mean_reward:72.66199999999999,reward_variance:25.978005235198488,mean_actor_loss:-0.10722807213896826,mean_critic_loss:2.42104439533448,mean_entropy_loss:-3.22899555683136,mean_overall_loss:2.3106003895982283


In [27]:
preprocessed_results_df.shape

(54, 15)

In [28]:
# Remove text from each column
preprocessed_results_df["min_reward"] = preprocessed_results_df["min_reward"].str.split(":").str[1]
preprocessed_results_df["max_reward"] = preprocessed_results_df["max_reward"].str.split(":").str[1]
preprocessed_results_df["mean_reward"] = preprocessed_results_df["mean_reward"].str.split(":").str[1]
preprocessed_results_df["reward_variance"] = preprocessed_results_df["reward_variance"].str.split(":").str[1]
preprocessed_results_df["mean_actor_loss"] = preprocessed_results_df["mean_actor_loss"].str.split(":").str[1]
preprocessed_results_df["mean_critic_loss"] = preprocessed_results_df["mean_critic_loss"].str.split(":").str[1]
preprocessed_results_df["mean_entropy_loss"] = preprocessed_results_df["mean_entropy_loss"].str.split(":").str[1]
preprocessed_results_df["mean_overall_loss"] = preprocessed_results_df["mean_overall_loss"].str.split(":").str[1]

preprocessed_results_df["num_env"] = preprocessed_results_df["num_env"].str.split(":").str[1]
preprocessed_results_df["num_episodes"] = preprocessed_results_df["num_episodes"].str.split(":").str[1]
preprocessed_results_df["learning_rate"] = preprocessed_results_df["learning_rate"].str.split(":").str[1]
preprocessed_results_df["hidden_layers"] = preprocessed_results_df["hidden_layers"].str.split(":").str[1]

preprocessed_results_df.head(3)

Unnamed: 0,index,datetime,wall_time,num_env,num_episodes,learning_rate,hidden_layers,min_reward,max_reward,mean_reward,reward_variance,mean_actor_loss,mean_critic_loss,mean_entropy_loss,mean_overall_loss
0,0,20210407-003658,127.93577520200004,6,50000,0.001,"(32, 32)",14.9,119.6,72.66199999999999,25.978005235198488,-0.1072280721389682,2.42104439533448,-3.22899555683136,2.3106003895982283
1,1,20210407-003658,125.21537914199996,6,50000,0.001,"(64, 64)",16.5,148.4,90.462,30.42119583448356,-0.1075161715051411,2.086534028534334,-3.17665771484375,1.9758377702338388
2,2,20210407-003658,128.042971476,6,50000,0.001,"(128, 128)",15.4,142.1,98.764,27.70934687068607,-0.1073100725279418,1.8928728663544503,-3.1673620653152468,1.7824184411410824


In [29]:
# Save results to CSV
preprocessed_results_df.to_csv(RESULTS_CSV, index=False)