In [None]:
# use full window width
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import os
import numpy as np

def get_current_dir():
    return os.getcwd().split("/")[-1]

def in_project_root_directory():
    return get_current_dir() == "ai-coursework-group6"

if not in_project_root_directory():
    os.chdir('..')

import virl
from matplotlib import pyplot as plt

In [None]:
import pandas as pd
from IPython import display

def smooth_plot(all_rewards, smoothed_rewards,title): 
    plt.figure(2, figsize=(12, 6))
    plt.clf()
    plt.title(title)
    plt.xlabel("Epsiode") 
    plt.ylabel("Total Reward")
    plt.plot(all_rewards, '--', alpha=0.5) 
    plt.plot(smoothed_rewards) 
    plt.legend(["Rewards", "Rewards (Smoothed)"]) 
    plt.pause(0.0001)

In [None]:
# Let's import basic tools for defining the function and doing the gradient-based learning
import sklearn.pipeline
import sklearn.preprocessing
from sklearn.linear_model import SGDRegressor # this defines the SGD function
from sklearn.kernel_approximation import RBFSampler # this is the RBF function transformation method

In [None]:
from LinearAprxAgent import LinearAprxAgent

### Training

In [None]:
from plot_helper import plot
import os

In [None]:
#train on each on problem id
for i in range(0,10):
    env = virl.Epidemic(stochastic=False, noisy=False,problem_id=i)
    agent = LinearAprxAgent(env)

    title = "Train: problem id " + str(i) + " "
    print(title)
    
    states,all_rewards, all_total_rewards,func_approximator, state_transformed, q_value = agent.train()
    intercept = func_approximator.intercept_
    coeff = func_approximator.coef_

    #save for evaluation
    np.savetxt("trained_agents/linear_policy/interceptP" + str(i) + ".csv", intercept, delimiter=",")
    np.savetxt("trained_agents/linear_policy/coeffP" + str(i) + ".csv", coeff, delimiter=",")
    np.savetxt("trained_agents/linear_policy/state_transformedP" + str(i) + ".csv", state_transformed, delimiter=",")
    np.savetxt("trained_agents/linear_policy/q_valueP" + str(i) + ".csv", q_value, delimiter=",")
    np.savetxt("trained_agents/linear_policy/lastRewardP" + str(i) + ".csv", all_rewards[-1], delimiter=",")

In [None]:
stochastic_noisy = {0:virl.Epidemic(stochastic=False, noisy=False),1:virl.Epidemic(stochastic=True, noisy=False),2:virl.Epidemic(stochastic=False, noisy=True),3:virl.Epidemic(stochastic=True, noisy=True)}
stochastic_noisy_print = {0:"stochastic=False, noisy=False",1:"stochastic=True, noisy=False",2:"stochastic=False, noisy=True",3:"stochastic=True, noisy=True"}

for i in range(0,4):
    
    env = stochastic_noisy[i]
    print(stochastic_noisy_print[i])
    agent = LinearAprxAgent(env)
    
    states,all_rewards, all_total_rewards,func_approximator, state_transformed, q_value = agent.train()
    intercept = func_approximator.intercept_
    coeff = func_approximator.coef_

    #save for evaluation
    np.savetxt("trained_agents/linear_policy/interceptS" + str(i) + ".csv", intercept, delimiter=",")
    np.savetxt("trained_agents/linear_policy/coeffS" + str(i) + ".csv", coeff, delimiter=",")
    np.savetxt("trained_agents/linear_policy/state_transformedS" + str(i) + ".csv", state_transformed, delimiter=",")
    np.savetxt("trained_agents/linear_policy/q_valueS" + str(i) + ".csv", q_value, delimiter=",")
    np.savetxt("trained_agents/linear_policy/lastRewardP" + str(i) + ".csv", all_rewards[-1], delimiter=",")

In [None]:
total_reward_sd = np.std(all_total_rewards)
total_reward_mean = np.mean(all_total_rewards)
reward_sd = np.std(all_rewards)
reward_mean = np.mean(all_rewards)

print("Total reward standard deviation = " + str(total_reward_sd))
print("Total reward mean = " + str(total_reward_mean))
print("Reward standard deviation = " + str(reward_sd))
print("Total reward mean = " + str(reward_mean))