In [1]:
from bindsnet.network import Network
from bindsnet.pipeline import EnvironmentPipeline
from bindsnet.learning import MSTDP, MSTDPET, PostPre
from bindsnet.encoding import bernoulli
from bindsnet.network.topology import Connection
from bindsnet.environment import GymEnvironment
from bindsnet.network.nodes import Input, LIFNodes
from bindsnet.pipeline.action import select_softmax

In [6]:
# Build network.
network1 = Network(dt=1.0)

# Layers of neurons.
inpt = Input(n=80 * 80, shape=[1, 1, 1, 80, 80], traces=True)
middle = LIFNodes(n=200, traces=True)
out = LIFNodes(n=4, refrac=0, traces=True)

# Connections between layers.
inpt_middle = Connection(source=inpt, target=middle, wmin=0, wmax=1e-1)
middle_out = Connection(
    source=middle,
    target=out,
    wmin=0,
    wmax=1,
    update_rule=PostPre,
    nu=1e-1,
    norm=0.5 * middle.n,
)

# Add all layers and connections to the network.
network1.add_layer(inpt, name="Input Layer")
network1.add_layer(middle, name="Hidden Layer")
network1.add_layer(out, name="Output Layer")
network1.add_connection(inpt_middle, source="Input Layer", target="Hidden Layer")
network1.add_connection(middle_out, source="Hidden Layer", target="Output Layer")

# Load the Breakout environment.
environment = GymEnvironment("BreakoutDeterministic-v4")
environment.reset()

# Build pipeline from specified components.
environment_pipeline = EnvironmentPipeline(
    network1,
    environment,
    encoding=bernoulli,
    action_function=select_softmax,
    output="Output Layer",
    time=100,
    history_length=5,
    delta=1,
    render_interval=1,
)


def run_pipeline(pipeline, episode_count):
    plot_reward = []
    for i in range(episode_count):
        total_reward = 0
        pipeline.reset_state_variables()
        is_done = False
        while not is_done:
            result = pipeline.env_step()
            pipeline.step(result)

            reward = result[1]
            total_reward += reward

            is_done = result[2]
        plot_reward.append(total_reward)
        print(f"Episode {i} total reward:{total_reward}")
    return plot_reward

In [7]:
print("Training: ")
plot_reward_STDP = run_pipeline(environment_pipeline, episode_count=1000)

0 total reward:3.0
Episode 311 total reward:4.0
Episode 312 total reward:0.0
Episode 313 total reward:2.0
Episode 314 total reward:2.0
Episode 315 total reward:1.0
Episode 316 total reward:0.0
Episode 317 total reward:0.0
Episode 318 total reward:1.0
Episode 319 total reward:0.0
Episode 320 total reward:0.0
Episode 321 total reward:0.0
Episode 322 total reward:0.0
Episode 323 total reward:0.0
Episode 324 total reward:1.0
Episode 325 total reward:1.0
Episode 326 total reward:2.0
Episode 327 total reward:0.0
Episode 328 total reward:1.0
Episode 329 total reward:0.0
Episode 330 total reward:2.0
Episode 331 total reward:2.0
Episode 332 total reward:0.0
Episode 333 total reward:1.0
Episode 334 total reward:1.0
Episode 335 total reward:0.0
Episode 336 total reward:0.0
Episode 337 total reward:0.0
Episode 338 total reward:0.0
Episode 339 total reward:0.0
Episode 340 total reward:0.0
Episode 341 total reward:2.0
Episode 342 total reward:0.0
Episode 343 total reward:0.0
Episode 344 total reward

In [2]:
# Build network.
network2 = Network(dt=1.0)

# Layers of neurons.
inpt = Input(n=80 * 80, shape=[1, 1, 1, 80, 80], traces=True)
middle = LIFNodes(n=200, traces=True)
out = LIFNodes(n=4, refrac=0, traces=True)

# Connections between layers.
inpt_middle = Connection(source=inpt, target=middle, wmin=0, wmax=1e-1)
middle_out = Connection(
    source=middle,
    target=out,
    wmin=0,
    wmax=1,
    update_rule=MSTDP,
    nu=1e-1,
    norm=0.5 * middle.n,
)

# Add all layers and connections to the network.
network2.add_layer(inpt, name="Input Layer")
network2.add_layer(middle, name="Hidden Layer")
network2.add_layer(out, name="Output Layer")
network2.add_connection(inpt_middle, source="Input Layer", target="Hidden Layer")
network2.add_connection(middle_out, source="Hidden Layer", target="Output Layer")

# Load the Breakout environment.
environment = GymEnvironment("BreakoutDeterministic-v4")
environment.reset()

# Build pipeline from specified components.
environment_pipeline = EnvironmentPipeline(
    network2,
    environment,
    encoding=bernoulli,
    action_function=select_softmax,
    output="Output Layer",
    time=100,
    history_length=5,
    delta=1,
    render_interval=1,
    reward_plot = True
)


def run_pipeline(pipeline, episode_count):
    plot_reward = []
    for i in range(episode_count):
        total_reward = 0
        pipeline.reset_state_variables()
        is_done = False
        while not is_done:
            result = pipeline.env_step()
            pipeline.step(result)

            reward = result[1]
            total_reward += reward

            is_done = result[2]
        plot_reward.append(total_reward)
        print(f"Episode {i} total reward:{total_reward}")
    return plot_reward

In [3]:
print("Training: ")
plot_reward_MSTDP = run_pipeline(environment_pipeline, episode_count=10)

Training: 
Episode 0 total reward:3.0
Episode 1 total reward:2.0
Episode 2 total reward:2.0
Episode 3 total reward:1.0
Episode 4 total reward:1.0
Episode 5 total reward:0.0
Episode 6 total reward:2.0
Episode 7 total reward:0.0
Episode 8 total reward:1.0
Episode 9 total reward:1.0


In [9]:
environment_pipeline.reward_list

[3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0]

In [4]:
# Build network.
network3 = Network(dt=1.0)

# Layers of neurons.
inpt = Input(n=80 * 80, shape=[1, 1, 1, 80, 80], traces=True)
middle = LIFNodes(n=200, traces=True)
out = LIFNodes(n=4, refrac=0, traces=True)

# Connections between layers.
inpt_middle = Connection(source=inpt, target=middle, wmin=0, wmax=1e-1)
middle_out = Connection(
    source=middle,
    target=out,
    wmin=0,
    wmax=1,
    update_rule=MSTDPET,
    nu=1e-1,
    norm=0.5 * middle.n,
)

# Add all layers and connections to the network.
network3.add_layer(inpt, name="Input Layer")
network3.add_layer(middle, name="Hidden Layer")
network3.add_layer(out, name="Output Layer")
network3.add_connection(inpt_middle, source="Input Layer", target="Hidden Layer")
network3.add_connection(middle_out, source="Hidden Layer", target="Output Layer")

# Load the Breakout environment.
environment = GymEnvironment("BreakoutDeterministic-v4")
environment.reset()

# Build pipeline from specified components.
environment_pipeline = EnvironmentPipeline(
    network3,
    environment,
    encoding=bernoulli,
    action_function=select_softmax,
    output="Output Layer",
    time=100,
    history_length=5,
    delta=1,
    render_interval=1,
)


def run_pipeline(pipeline, episode_count):
    plot_reward = []
    for i in range(episode_count):
        total_reward = 0
        pipeline.reset_state_variables()
        is_done = False
        while not is_done:
            result = pipeline.env_step()
            pipeline.step(result)

            reward = result[1]
            total_reward += reward

            is_done = result[2]
        plot_reward.append(total_reward)
        print(f"Episode {i} total reward:{total_reward}")
    return plot_reward

In [5]:
print("Training: ")
plot_reward_MSTDPET = run_pipeline(environment_pipeline, episode_count=1000)

0 total reward:0.0
Episode 311 total reward:2.0
Episode 312 total reward:0.0
Episode 313 total reward:1.0
Episode 314 total reward:4.0
Episode 315 total reward:2.0
Episode 316 total reward:0.0
Episode 317 total reward:0.0
Episode 318 total reward:1.0
Episode 319 total reward:3.0
Episode 320 total reward:0.0
Episode 321 total reward:1.0
Episode 322 total reward:0.0
Episode 323 total reward:3.0
Episode 324 total reward:0.0
Episode 325 total reward:1.0
Episode 326 total reward:1.0
Episode 327 total reward:0.0
Episode 328 total reward:0.0
Episode 329 total reward:0.0
Episode 330 total reward:0.0
Episode 331 total reward:0.0
Episode 332 total reward:0.0
Episode 333 total reward:0.0
Episode 334 total reward:2.0
Episode 335 total reward:0.0
Episode 336 total reward:3.0
Episode 337 total reward:1.0
Episode 338 total reward:0.0
Episode 339 total reward:0.0
Episode 340 total reward:2.0
Episode 341 total reward:0.0
Episode 342 total reward:2.0
Episode 343 total reward:1.0
Episode 344 total reward

In [8]:
import pandas as pd
import numpy as np

d = {'STDP': plot_reward_STDP, 'MSTDP': plot_reward_MSTDP, 'MSTDPET': plot_reward_MSTDPET}
df = pd.DataFrame(data=d)
df.to_csv('RL_breakout_results.csv', index=False)

In [1]:
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
output_notebook()

In [27]:
p = figure(height=300, x_axis_label='Episodes', y_axis_label='Score')
p.vbar(x = np.arange(len(plot_reward_STDP)), top = plot_reward_STDP, color='green', legend_label='STDP')
show(p)

In [28]:
p = figure(height=300, x_axis_label='Episodes', y_axis_label='Score')
p.vbar(x = np.arange(len(plot_reward_MSTDP)), top = plot_reward_MSTDP, color='blue', legend_label='MSTDP')
show(p)

In [29]:
p = figure(height=300, x_axis_label='Episodes', y_axis_label='Score')
p.vbar(x = np.arange(len(plot_reward_MSTDPET)), top = plot_reward_MSTDPET, color='red', legend_label='MSTDPET')
show(p)

In [2]:
import numpy as np
import pandas as pd

In [6]:
d = pd.read_csv('RL_breakout_results.csv')
plot_reward_STDP = d['STDP']
plot_reward_MSTDP = d['MSTDP']
plot_reward_MSTDPET = d['MSTDPET']

In [7]:
avg_score = np.array([np.average(plot_reward_STDP), np.average(plot_reward_MSTDP), np.average(plot_reward_MSTDPET)])
methods = ['STDP', 'MSTDP', 'MSTDPET']
p = figure(x_range = methods, height=300, x_axis_label = 'Methods', y_axis_label = 'Average Score')
p.vbar(x = methods, top = avg_score, width = 0.5, color = 'Orange')
show(p)

In [13]:
max_score = np.array([np.max(plot_reward_STDP), np.max(plot_reward_MSTDP), np.max(plot_reward_MSTDPET)])
methods = ['STDP', 'MSTDP', 'MSTDPET']
p = figure(x_range = methods, height=300, x_axis_label = 'Methods', y_axis_label = 'Highest Score')
p.vbar(x = methods, top = max_score, width = 0.5, color = 'navy')
show(p)