In [None]:
import numpy as np
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import os
os.chdir('..')
import virl
from helper_methods import run, plot
from plot_helper import plot as eval_plot
from matplotlib import pyplot as plt
import pickle

def plot(all_rewards, title):
    plt.title("Evaluate: " + title)
    plt.xlabel("Epsiode")
    plt.ylabel("Total Reward")
    plt.plot(all_rewards)

In [None]:
#evaluate the stochastic and noisy parameters
stochastic_noisy = {0:virl.Epidemic(stochastic=False, noisy=False),1:virl.Epidemic(stochastic=True, noisy=False),2:virl.Epidemic(stochastic=False, noisy=True),3:virl.Epidemic(stochastic=True, noisy=True)}
stochastic_noisy_print = {0:"stochastic=False, noisy=False",1:"stochastic=True, noisy=False",2:"stochastic=False, noisy=True",3:"stochastic=True, noisy=True"}


In [None]:
def mean_std(all_total_rewards,all_rewards,title):
    #mean and standard deviation
    total_reward_sd = np.std(all_total_rewards)
    total_reward_mean = np.mean(all_total_rewards)
    reward_sd = np.std(all_rewards)
    reward_mean = np.mean(all_rewards)

    print("---")
    print(title)
    print("Total reward standard deviation = " + str(total_reward_sd))
    print("Total reward mean = " + str(total_reward_mean))
    print("Reward standard deviation = " + str(reward_sd))
    print("Rotal reward mean = " + str(reward_mean))

## Deterministic

In [None]:
from DeterministicAgent import DeterministicAgent

actions = ["no intervention", "impose a full lockdown", "implement track & trace", "enforce social distancing and face masks"]
stochastic = [False, True]
noisy = [False, True]


for i in range(len(stochastic)):
    for j in range(len(noisy)):
        for action, action_text in enumerate(actions):
            env = virl.Epidemic(stochastic=stochastic[i], noisy=noisy[j])
            agent = DeterministicAgent(env, action, action_text)
            states, rewards = run(agent)
            title = ", Stochastic = " + str(stochastic[i]) + ", " "Noisy = " + str(noisy[j])
            plot(agent, states, rewards, title)

for i in range(0,10):
    for action, action_text in enumerate(actions):
        env = virl.Epidemic(stochastic=True, noisy=True,problem_id=i)
        agent = DeterministicAgent(env, action, action_text)
        states, rewards = run(agent)
        title = ", Problem id=" + str(i)
        plot(agent, states, rewards, title)

## Random

In [None]:
from RandomAgent import RandomAgent

actions = ["no intervention", "impose a full lockdown", "implement track & trace", "enforce social distancing and face masks"]
stochastic = [False, True]
noisy = [False, True]

for i in range(len(stochastic)):
    for j in range(len(noisy)):
        env = virl.Epidemic(stochastic=stochastic[i], noisy=noisy[j])
        agent = RandomAgent(env, actions)
        states, rewards = run(agent)
        plot(agent, states, rewards)
        
for i in range(0,10):
    env = virl.Epidemic(stochastic=True, noisy=True,problem_id=i)
    agent = RandomAgent(env, actions)
    states, rewards = run(agent)
    title = ", Problem id=" + str(i)
    plot(agent, states, rewards, title)

## Linear Policy

In [None]:
%cd
%cd ai-coursework-group6\notebooks\linear_aprx_output
%pwd

for i in range(0,10):
    title = "Linear Policy Evaluate: problem id " + str(i) + " "
    
    intercept = np.genfromtxt("interceptP" + str(i) + ".csv", delimiter=',')
    coeff = np.genfromtxt("coeffP" + str(i) + ".csv", delimiter=',')
    state_transformed = np.genfromtxt("state_transformedP" + str(i) + ".csv", delimiter=',')
    q_value = np.genfromtxt("q_valueP" + str(i) + ".csv", delimiter=',')
    
    states,all_rewards, all_total_rewards,func_approximator, state_transformed, q_value = agent.evaluate()
    eval_plot(states,agent, all_total_rewards, all_rewards,title)
   

In [None]:
for i in range(0,4):
    print(stochastic_noisy_print[i])
    title = "Linear Policy Evaluate:" + stochastic_noisy_print
    
    intercept = np.genfromtxt("interceptP" + str(i) + ".csv", delimiter=',')
    coeff = np.genfromtxt("coeffP" + str(i) + ".csv", delimiter=',')
    state_transformed = np.genfromtxt("state_transformedP" + str(i) + ".csv", delimiter=',')
    q_value = np.genfromtxt("q_valueP" + str(i) + ".csv", delimiter=',')
    
    states,all_rewards, all_total_rewards,func_approximator, state_transformed, q_value = agent.evaluate()
    eval_plot(states,agent, all_total_rewards, all_rewards,title)

## Tabular Policy

In [None]:
%cd
%cd ai-coursework-group6\notebooks
%pwd

for i in range(0,10):
    title = "Q Learning Evaluate: problem id " + str(i) + " "
    
    with open('qlearningP' + str(i) + '.pkl', 'rb') as inpt:
        agent = pickle.load(inpt)
        states,all_rewards, all_total_rewards = agent.evaluate()
        eval_plot(states,agent, all_total_rewards, all_rewards,title)
        mean_std(all_total_rewards,all_rewards,title)
        
        


In [None]:

for i in range(0,4):
    print(stochastic_noisy_print[i])
    title = "Q Learning Evaluate:" + stochastic_noisy_print
    
    with open('qlearningS' + str(i) + '.pkl', 'rb') as inpt:
        agent = pickle.load(inpt)
    states,all_rewards, all_total_rewards = agent.evaluate()
    eval_plot(states,agent, all_total_rewards, all_rewards,title)
    mean_std(all_total_rewards,all_rewards,title)

## Q learning

In [None]:
from q_learning_tabular.q_table import QTable
from q_learning_tabular.q_learning_agent import QLearningAgent

In [None]:
def evaluate_q_learning_tabular(stochastic, noisy):
    for i in range(0,10):
        filename = "q_learning_tabular_problem_id_" + str(i) + ".txt"
        internal_loaded_q_table = QTable.load_raw_q_table_from_file(filename)

        env = virl.Epidemic(stochastic=stochastic, noisy=noisy, problem_id=i)
        agent = QLearningAgent(env, print_out_every_x_episodes=1, internal_q_table=internal_loaded_q_table, )
        all_total_rewards = agent.evaluate(10)

        title = "Q Learning Tabular with problem id " + str(i)
        plot(all_total_rewards, title)

In [None]:
# os.chdir("ai-coursework-group6")

evaluate_q_learning_tabular(stochastic=False, noisy=False)

## Q learning Neural Network

In [None]:
from q_learning_nn.nn_function_approximator import NNFunctionApproximatorJointKeras, init_networks, load_trained_network
from q_learning_nn.memory import ReplayMemory, Transition
from q_learning_nn.agent import Agent
from q_learning_nn.strategy import Strategy
from q_learning_nn.run import qlearning_nn

policy_trained_using_problem_id_zero = "policy_network2.h5"
policy_network_new, target_network_new = load_trained_network(policy_trained_using_problem_id_zero)

In [None]:
for i in range(10):
    agent = Agent(virl.Epidemic(stochastic=False, noisy=False, problem_id=i), learning_rate=0.0)
    rewards = qlearning_nn(
        agent=agent, policy_network=policy_network_new, target_network=target_network_new, num_episodes=1
    )