In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt

import sys
sys.path.append("..\\Source")

import numpy as np
from Environments.CleanBotEnv import CleanBotEnv
from Models.TableModel import TableModel
from Methods.MonteCarlo import AveragingMC
from Methods.Policies import EpsilonGreedyPolicy
from Utilities.Eval import StatsLogger, ScrollingPlot
from Experiments.CleanBotExperiments import *

In [None]:
class TestingStats:
    def __init__(self):
        self.training_avg_reward = 0.0
        self.training_avg_rms = 0.0
        self.testing_avg_reward = 0.0
        
np.random.seed(643674)
experiments = []
figures = []

constructors = [AlphaMCArrayModel, AlphaMC1ConvKerasModel, Sarsa1ConvKerasModel] 
for constructor in constructors:
    experiment = constructor()
    trainingStats = StatsLogger(experiment.method.stats, max_length=100000)
    testingStat = TestingStats()
    testingStats = StatsLogger(testingStat, max_length=10000)
    experiments.append((experiment, trainingStats, testingStat, testingStats))

    figures.append(
        {                       
            "source": testingStats,
            "plots": [
               {
                   "stat" : "training_avg_reward",
                   "color": "b"
               },
               {
                   "stat" : "testing_avg_reward",
                   "color": "g"
               }
            ]
        }
    )

In [None]:
plotHelper = ScrollingPlot(figures)

In [None]:
plot_upate_steps = 201
testing_update_steps = 50

try:
    episode_count = 50001
    for i in range(episode_count):
        random_state = np.random.get_state()
        for experiment, trainingStats, testingStat, testingStats in experiments:
            np.random.set_state(random_state)
            experiment.method.run_episode()
            trainingStats.append(experiment.method.stats)

            if i % testing_update_steps == testing_update_steps-1:
                testingStat.training_avg_reward = np.average(trainingStats.data["episode_reward"][-testing_update_steps:])
                testingStat.testing_avg_reward = experiment.validate()
                testingStats.append(testingStat)
        
        if i % plot_upate_steps == plot_upate_steps-1:            
            plotHelper.update_plot() 
except KeyboardInterrupt:
    print("Keyborad interrupt")