In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as ipw

In [10]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [11]:
cutoff=250

## Discrete

### Effect of learning rate

In [12]:
base = "./discrete/learning_rate/lr"

In [14]:
fig = go.FigureWidget()
fig3 = go.FigureWidget()

values = ["1e-1","1e-2","1e-3","1e-4","1e-5"]
percentages = []
window=50
for value in values:
    rewards = pd.read_csv(base+value+".csv")[cutoff:]
    percentages.append(len(rewards[rewards["Value"] >= 200])/len(rewards))
    fig.add_trace(go.Box(
        y=rewards["Value"],
        name=value,
        boxpoints="outliers",
        marker_color='rgb(7,40,89)',
        boxmean=True # represent mean
    ))
    fig3.add_trace(go.Scatter(
        y=rewards["Value"].rolling(window=window).mean()[window:],
        x=rewards["Step"][window:],
        name="lr-"+value       # this sets its legend entry
    ))
    
fig2 = go.FigureWidget(data=[go.Bar(
    x=values, y=percentages,
    text=percentages,Widget
    textposition='auto',
)])
fig2.update_traces(marker_color='rgb(57,90,139)',marker_line_color='rgb(7,40,89)')

fig3.update_layout(
    title=f"Reward per episode (moving average with window of size {window})",
    yaxis_title="cummulative reward",
    xaxis_title="episodes"
)
fig2.update_layout(
    title="Proportion of succesful landings per learning rate",
    yaxis_title="proportion of succesful landings",
    xaxis_title="learning rate",
    showlegend=False
)
fig.update_layout(
    title="Reward distribution per learning rate",
    yaxis_title="cummulative reward",
    xaxis_title="learning rate",
    showlegend=False
)

#fig.show()
#fig2.show()
#fig3.show()
ipw.HBox([fig,ipw.VBox([fig2, fig3])])

HBox(children=(FigureWidget({
    'data': [{'boxmean': True,
              'boxpoints': 'outliers',
          …

### Epsilon decay

In [17]:
base = "./discrete/epsilon_decay/ed"

In [18]:
fig = go.FigureWidget()
fig3 = go.FigureWidget()

values = ["5e-2","5e-3","5e-4","5e-5","5e-6"]
percentages = []
window=50
for value in values:
    rewards = pd.read_csv(base+value+".csv")[cutoff:]
    percentages.append(len(rewards[rewards["Value"] >= 200])/len(rewards))
    fig.add_trace(go.Box(
        y=rewards["Value"],
        name=value,
        boxpoints="outliers",
        marker_color='rgb(7,40,89)',
        boxmean=True # represent mean
    ))
    fig3.add_trace(go.Scatter(
        y=rewards["Value"].rolling(window=window).mean()[window:],
        x=rewards["Step"][window:],
        name="decrement-"+value       # this sets its legend entry
    ))
    
fig2 = go.FigureWidget(data=[go.Bar(
    x=values, y=percentages,
    text=percentages,
    textposition='auto',
)])
fig2.update_traces(marker_color='rgb(57,90,139)',marker_line_color='rgb(7,40,89)')

fig3.update_layout(
    title=f"Reward per episode (moving average with window of size {window})",
    yaxis_title="cummulative reward",
    xaxis_title="episodes"
)
fig2.update_layout(
    title="Proportion of succesful landings per decrement value",
    yaxis_title="proportion of succesful landings",
    xaxis_title="decrement value",
    showlegend=False
)
fig.update_layout(
    title="Reward distribution per decrement value",
    yaxis_title="cummulative reward",
    xaxis_title="decrement value",
    showlegend=False
)
ipw.HBox([fig,ipw.VBox([fig2, fig3])])

HBox(children=(FigureWidget({
    'data': [{'boxmean': True,
              'boxpoints': 'outliers',
          …

## Effect of batch size

In [19]:
base = "./discrete/batch_size/bz"

In [20]:
fig = go.FigureWidget()
fig3 = go.FigureWidget()

values = ["32","64","128","256","512"]
percentages = []
window=50
for value in values:
    rewards = pd.read_csv(base+value+".csv")[cutoff:]
    percentages.append(len(rewards[rewards["Value"] >= 200])/len(rewards))
    fig.add_trace(go.Box(
        y=rewards["Value"],
        name=value,
        boxpoints="outliers",
        marker_color='rgb(7,40,89)',
        boxmean=True # represent mean
    ))
    fig3.add_trace(go.Scatter(
        y=rewards["Value"].rolling(window=window).mean()[window:],
        x=rewards["Step"][window:],
        name="batch size-"+value       # this sets its legend entry
    ))
    
fig2 = go.FigureWidget(data=[go.Bar(
    x=values, y=percentages,
    text=percentages,
    textposition='auto',
)])
fig2.update_traces(marker_color='rgb(57,90,139)',marker_line_color='rgb(7,40,89)')

fig3.update_layout(
    title=f"Reward per episode (moving average with window of size {window})",
    yaxis_title="cummulative reward",
    xaxis_title="episodes"
)
fig2.update_layout(
    title="Proportion of succesful landings per batch size",
    yaxis_title="proportion of succesful landings",
    xaxis_title="batch size",
    showlegend=False
)
fig.update_layout(
    title="Reward distribution per batch size",
    yaxis_title="cummulative reward",
    xaxis_title="batch size",
    showlegend=False
)
ipw.HBox([fig,ipw.VBox([fig2, fig3])])

HBox(children=(FigureWidget({
    'data': [{'boxmean': True,
              'boxpoints': 'outliers',
          …

## Continuous

### Effect of learning rate

In [21]:
base = "./continuous/learning_rate/reward_per_episode_learning_rate_"

In [22]:
fig = go.FigureWidget()
fig3 = go.FigureWidget()

values = ["1e-1","1e-2","1e-3","1e-4","1e-5"]
percentages = []
window=50
for value in values:
    rewards = pd.read_csv(base+value+".csv")[cutoff:]
    percentages.append(len(rewards[rewards["Value"] >= 200])/len(rewards))
    fig.add_trace(go.Box(
        y=rewards["Value"],
        name=value,
        boxpoints="outliers",
        marker_color='rgb(7,40,89)',
        boxmean=True # represent mean
    ))
    fig3.add_trace(go.Scatter(
        y=rewards["Value"].rolling(window=window).mean()[window:],
        x=rewards["Step"][window:],
        name="lr-"+value       # this sets its legend entry
    ))
    
fig2 = go.FigureWidget(data=[go.Bar(
    x=values, y=percentages,
    text=percentages,
    textposition='auto',
)])
fig2.update_traces(marker_color='rgb(57,90,139)',marker_line_color='rgb(7,40,89)')

fig3.update_layout(
    title=f"Reward per episode (moving average with window of size {window})",
    yaxis_title="cummulative reward",
    xaxis_title="episodes"
)
fig2.update_layout(
    title="Proportion of succesful landings per learning rate",
    yaxis_title="proportion of succesful landings",
    xaxis_title="learning rate",
    showlegend=False
)
fig.update_layout(
    title="Reward distribution per learning rate",
    yaxis_title="cummulative reward",
    xaxis_title="learning rate",
    showlegend=False
)
ipw.HBox([fig,ipw.VBox([fig2, fig3])])

HBox(children=(FigureWidget({
    'data': [{'boxmean': True,
              'boxpoints': 'outliers',
          …

### Effect of epsilon decay

In [23]:
base = "./continuous/epsilon_decay/reward_per_episode_"

In [24]:
fig = go.FigureWidget()
fig3 = go.FigureWidget()

values = ["1e-1","1e-2","1e-3","1e-4","1e-5"]
percentages = []
window=50
for value in values:
    rewards = pd.read_csv(base+value+".csv")[cutoff:]
    percentages.append(len(rewards[rewards["Value"] >= 200])/len(rewards))
    fig.add_trace(go.Box(
        y=rewards["Value"],
        name=value,
        boxpoints="outliers",
        marker_color='rgb(7,40,89)',
        boxmean=True # represent mean
    ))
    fig3.add_trace(go.Scatter(
        y=rewards["Value"].rolling(window=window).mean()[window:],
        x=rewards["Step"][window:],
        name="decrement-"+value       # this sets its legend entry
    ))
    
fig2 = go.FigureWidget(data=[go.Bar(
    x=values, y=percentages,
    text=percentages,
    textposition='auto',
)])
fig2.update_traces(marker_color='rgb(57,90,139)',marker_line_color='rgb(7,40,89)')

fig3.update_layout(
    title=f"Reward per episode (moving average with window of size {window})",
    yaxis_title="cummulative reward",
    xaxis_title="episodes"
)
fig2.update_layout(
    title="Proportion of succesful landings per decrement value",
    yaxis_title="proportion of succesful landings",
    xaxis_title="decrement value",
    showlegend=False
)
fig.update_layout(
    title="Reward distribution per decrement value",
    yaxis_title="cummulative reward",
    xaxis_title="decrement value",
    showlegend=False
)
ipw.HBox([fig,ipw.VBox([fig2, fig3])])

HBox(children=(FigureWidget({
    'data': [{'boxmean': True,
              'boxpoints': 'outliers',
          …

### Effect of batch_size

In [25]:
base = "./continuous/batch_size/reward_per_episode_batch_size_"

In [26]:
fig = go.FigureWidget()
fig3 = go.FigureWidget()

values = ["32","64","128","256","512"]
percentages = []
window=50
for value in values:
    rewards = pd.read_csv(base+value+".csv")[cutoff:]
    percentages.append(len(rewards[rewards["Value"] >= 200])/len(rewards))
    fig.add_trace(go.Box(
        y=rewards["Value"],
        name=value,
        boxpoints="outliers",
        marker_color='rgb(7,40,89)',
        boxmean=True # represent mean
    ))
    fig3.add_trace(go.Scatter(
        y=rewards["Value"].rolling(window=window).mean()[window:],
        x=rewards["Step"][window:],
        name="batch size-"+value       # this sets its legend entry
    ))
    
fig2 = go.FigureWidget(data=[go.Bar(
    x=values, y=percentages,
    text=percentages,
    textposition='auto',
)])
fig2.update_traces(marker_color='rgb(57,90,139)',marker_line_color='rgb(7,40,89)')

fig3.update_layout(
    title=f"Reward per episode (moving average with window of size {window})",
    yaxis_title="cummulative reward",
    xaxis_title="episodes"
)
fig2.update_layout(
    title="Proportion of succesful landings per batch size",
    yaxis_title="proportion of succesful landings",
    xaxis_title="batch size",
    showlegend=False
)
fig.update_layout(
    title="Reward distribution per batch size",
    yaxis_title="cummulative reward",
    xaxis_title="batch size",
    showlegend=False
)
ipw.HBox([fig,ipw.VBox([fig2, fig3])])

HBox(children=(FigureWidget({
    'data': [{'boxmean': True,
              'boxpoints': 'outliers',
          …