In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd

In [4]:
from environments.easy21 import EasyAction, EasyState, EasyEnvironment

In [5]:
# for plotting
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

# visualize plots in the jupyter notebook
# check more https://goo.gl/U3Ai8R
%matplotlib inline

In [6]:
def plot_value_function(V, title='Value Function', generate_gif=False, train_steps=None):
    """
    Plots a value function as a surface plot, like in: https://goo.gl/aF2doj

    You can choose between just plotting the graph for the value function
    which is the default behaviour (generate_gif=False) or to train the agent
    a couple of times and save the frames in a gif as you train.

    Args:
        agent: An agent.
        title (string): Plot title.
        generate_gif (boolean): If want to save plots as a gif.
        train_steps: If is not None and generate_gif = True, then will use this
                     value as the number of steps to train the model at each frame.
    """
    # you can change this values to change the size of the graph
    fig = plt.figure(title, figsize=(10, 5))
    
    # explanation about this line: https://goo.gl/LH5E7i
    ax = fig.add_subplot(111, projection='3d')
    
    def plot_frame(ax):
        # min value allowed accordingly with the documentation is 1
        # we're getting the max value from V dimensions
        min_x = 1
        max_x = V.shape[0]
        min_y = 1
        max_y = V.shape[1]

        # creates a sequence from min to max
        x_range = np.arange(min_x, max_x)
        y_range = np.arange(min_y, max_y)

        # creates a grid representation of x_range and y_range
        X, Y = np.meshgrid(x_range, y_range)

        # get value function for X and Y values
        def get_stat_val(x, y):
            return V[x, y]
        Z = get_stat_val(X, Y)

        # creates a surface to be ploted
        # check documentation for details: https://goo.gl/etEhPP
        ax.set_xlabel('Dealer Showing')
        ax.set_ylabel('Player Sum')
        ax.set_zlabel('Value')
        return ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, 
                               linewidth=0, antialiased=False)

    surf = plot_frame(ax)
    plt.title(title)
    fig.canvas.draw()
    plt.show()

## Monte-carlo control

In [9]:
from environments.easy21 import DealerAgent, MCAgent, EpsilonGreedyPolicy

In [13]:
episodes = 10000

rewards = []

env = EasyEnvironment()

agent = MCAgent(
    policy=EpsilonGreedyPolicy(N_zero=100)
)

for i in range(episodes):
    
    if i % (round(episodes / 20)) == 0:
        print(f"Episode: {i:>10}/{episodes} --- {i/episodes*100:>5.1f}%")
    
    env.reset()
    s = env.get_state()
    print(s.key)
    
    while not s.terminal:

        # Take step, first agent using current state, then environment using action
        a = agent.step(s)
        s, G = env.step(a)
        
        rewards.append(G)
    
    agent.optimize(G)

Episode:          0/10000 ---   0.0%
Field(name=None,type=None,default=<dataclasses._MISSING_TYPE object at 0x10a4b2be0>,default_factory=<function EasyState.<lambda> at 0x11934d940>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=None)-Field(name=None,type=None,default=<dataclasses._MISSING_TYPE object at 0x10a4b2be0>,default_factory=<function EasyState.<lambda> at 0x11934da60>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=None)-True


NameError: name 'G' is not defined

In [None]:
df = pd.DataFrame({"key": Q.r.keys(), "q": Q.r.values()})
df[["Dealer showing", "Player sum", "terminated", "action"]] = df["key"].str.split("-", expand=True).iloc[:, :4]
df = df[["Dealer showing", "Player sum", "q"]]
df = df.groupby(["Dealer showing", "Player sum"]).max()
df

In [None]:
v = df.unstack()
v = v.reset_index(drop=True)
v

In [None]:
plot_value_function(v.values)

In [None]:
v_columns = v.columns.to_list()

x_linspace = np.array(list(zip(*v_columns))[1], dtype=int)
y_linspace = v.index.to_numpy()

x, y = np.meshgrid(x_linspace, y_linspace)

In [None]:
fig = plt.figure(figsize=(10, 5))
    
ax = fig.add_subplot(111, projection='3d')

# Plot the surface.
surf = ax.plot_surface(x, y, v.values, 
                       cmap=cm.coolwarm,
                       rstride=1, cstride=1,
                       linewidth=0, antialiased=False)

ax.set_xlabel("Dealer showing")
ax.set_ylabel("Player sum")

plt.show()

In [None]:
print(x_linspace.shape, y_linspace.shape)
x_linspace, y_linspace

In [None]:
x, y = np.meshgrid(x_linspace, y_linspace)
print(x.shape, y.shape, v.transpose().values.shape)

In [None]:
fig, ax = plt.subplots(subplot_kw={"projection": "3d"})


# Plot the surface.
surf = ax.plot_surface(x, y, v.transpose().values, 
                       cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)

# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)

plt.show()