# Development Notebook
## Author: Daniel Mallia
Used for quick, interactive development and demonstration

### Part 1 - Environment
See the Easy21Environment module for implementation

In [None]:
import matplotlib.pyplot as plt
from Easy21Environment import *
from Easy21Agents import *
SEED = 1234

In [None]:
easy = Easy21Environment(SEED)

### Part 2  - Monte Carlo Control
See the Easy21Agents module for implementation

In [None]:
mc = Easy21MC(SEED)
mc.learn(easy,1000000)
fig = mc.plot_value_function()

### Part 3 - TD Learning

In [None]:
td = Easy21TD(SEED)
td.learn(easy,10000, 0)
fig = td.plot_value_function()

In [None]:
error_dict = {}
episode_mse_dict = {}
index = 0

easy_compare = Easy21Environment(SEED)
mc_compare = Easy21MC(SEED)
mc_compare.learn(easy_compare,1000000)
for td_lambda in [0.1 * i for i in range(0,11)]:
    easy_compare = Easy21Environment(SEED)
    td_compare = Easy21TD(SEED)
    episode_mse_dict[index] = td_compare.learn(easy_compare,1000, td_lambda, mc_comparison=mc_compare)
    error_dict[td_lambda] = compare_q_estimates(td_compare, mc_compare)
    index += 1
    
fig, ax = plt.subplots()
ax.plot(list(error_dict.keys()), list(error_dict.values()))
ax.set(xlabel="Lambda", ylabel="Mean Squared Error", title="MSE between TD and MC estimates, varying lambda")
plt.show()

fig, ax = plt.subplots()
ax.plot(list(episode_mse_dict[0].keys()), list(episode_mse_dict[0].values()), color="blue", label="Lambda = 0")
ax.plot(list(episode_mse_dict[index-1].keys()), list(episode_mse_dict[index-1].values()), color="red", label="Lambda = 1")
ax.legend()
ax.set(xlabel="Episode", ylabel="Mean Squared Error", title="MSE between TD and MC estimates, over episodes")
plt.show()

### Part 4 - Linear Function Approximation