# Merging datasets

## Global imports and variables

In [1]:
# Import for interactive notebook (see:
# https://ipywidgets.readthedocs.io/en/stable/examples/Using%20Interact.html)
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import Layout

# Import to list files in directories
import glob

# Import for regular expressions
import re

# Imports for path operations
import os
import os.path

# For date operations
from datetime import datetime

import pandas as pd
pd.set_option('max_colwidth', -1)
import matplotlib.pyplot as plt

import numpy as np
import configparser

# import jtplot module in notebook
from jupyterthemes import jtplot

# choose which theme to inherit plotting style from
# onedork | grade3 | oceans16 | chesterish | monokai | solarizedl | solarizedd
jtplot.style(theme='onedork')


In [2]:
RESULTS_DIR = "/Users/gomerudo/workspace/thesis_results"

def rettext(text):
    return text

form_item_layout = Layout(
    width="50%"
)

## Selecting the desired results

In [4]:
import os.path
results_sorted = sorted(glob.glob("{dir}/*".format(dir=RESULTS_DIR)))
dict_widgets = {}
list_widgets = []
for result in results_sorted:
    hbox = widgets.Box()
    dict_widgets[result] = widgets.Checkbox(
        value=False,
        description=os.path.basename(result),
        disabled=False
    )
    list_widgets.append(dict_widgets[result])

half = len(results_sorted)//2

left_box = widgets.VBox(list_widgets[:half])
right_box = widgets.VBox(list_widgets[half:])
display(widgets.HBox([left_box, right_box]))


HBox(children=(VBox(children=(Checkbox(value=False, description='27969'), Checkbox(value=False, description='2…

In [4]:
# Run to reset the checkboxes
for key, widget in dict_widgets.items():
    widget.value = False

## Do the merge

In [6]:
# Get the selected cells
selected_dirs = []
for key, widget in dict_widgets.items():
    if widget.value:
        selected_dirs.append(key)
        
selected_dirs = sorted(selected_dirs)
selected_dirs

# 5 dataframes
actions_dist_df = pd.DataFrame()
episodes_stats_df = pd.DataFrame()
progress_df = pd.DataFrame()
steps_stats_df = pd.DataFrame()
trails_stats_df = pd.DataFrame()

# if select.value == 1:
#     target_dir = "{root}/summary_chained".format(root=RESULTS_DIR)
# if select.value == 2:
#     target_dir = "{root}/summary_multibranch".format(root=RESULTS_DIR)
# if select.value == 3:
target_dir = "{root}/dqn_chained".format(root=RESULTS_DIR)


for directory in selected_dirs:
    current_summary_dir = "{root}/summary".format(root=directory)
    current_actions_dist_csv = "{root}/actions_dist.csv".format(root=current_summary_dir)
    current_episodes_stats_csv = "{root}/episodes_stats.csv".format(root=current_summary_dir)
    current_steps_stats_csv = "{root}/steps_stats.csv".format(root=current_summary_dir)
    current_trails_stats_csv = "{root}/trials_stats.csv".format(root=current_summary_dir)

    tmp_actions_dist_df = pd.read_csv(current_actions_dist_csv)
    tmp_episodes_stats_df = pd.read_csv(current_episodes_stats_csv)
    tmp_steps_stats_df = pd.read_csv(current_steps_stats_csv)
    tmp_trails_stats_df = pd.read_csv(current_trails_stats_csv)
    
    actions_dist_df = actions_dist_df.append(tmp_actions_dist_df, ignore_index=True)
    episodes_stats_df= episodes_stats_df.append(tmp_episodes_stats_df, ignore_index=True)
    steps_stats_df = steps_stats_df.append(tmp_steps_stats_df, ignore_index=True)
    trails_stats_df = trails_stats_df.append(tmp_trails_stats_df, ignore_index=True)
    
actions_dist_df.to_csv( 
    "{root}/actions_dist.csv".format(root=target_dir), index=False
)
episodes_stats_df.to_csv(
    "{root}/episodes_stats.csv".format(root=target_dir), index=False
)
steps_stats_df.to_csv(
    "{root}/steps_stats.csv".format(root=target_dir), index=False
)
trails_stats_df.to_csv(
    "{root}/trials_stats.csv".format(root=target_dir), index=False
)