# Assignment

In [5]:
# Import 

import numpy as np
import matplotlib.pyplot as plt

# import from files
from degree_freedom_queen import *
from degree_freedom_king1 import *
from degree_freedom_king2 import *
from generate_game import *
from Chess_env import *

from neural_net import *
from helper_functions import *

## The Environment

You can find the environment in the file Chess_env, which contains the class Chess_env. To define an object, you need to provide the board size considered as input. In our example, size_board=4. 
Chess_env is composed by the following methods:

1. Initialise_game. The method initialises an episode by placing the three pieces considered (Agent's king and queen, enemy's king) in the chess board. The outputs of the method are described below in order.

     - S $\;$ A matrix representing the board locations filled with 4 numbers: 0, no piece in that position; 1, location of the 
     agent's king; 2 location of the queen; 3 location of the enemy king.
     
     - X $\;$ The features, that is the input to the neural network. See the assignment for more information regarding the            definition of the features adopted. To personalise this, go into the Features method of the class Chess_env() and change        accordingly.
     
     - allowed_a $\;$ The allowed actions that the agent can make. The agent is moving a king, with a total number of 8                possible actions, and a queen, with a total number of $(board_{size}-1)\times 8$ actions. The total number of possible actions correspond      to the sum of the two, but not all actions are allowed in a given position (movements to locations outside the borders or      against chess rules). Thus, the variable allowed_a is a vector that is one (zero) for an action that the agent can (can't)      make. Be careful, apply the policy considered on the actions that are allowed only.
     

2. OneStep. The method performs a one step update of the system. Given as input the action selected by the agent, it updates the chess board by performing that action and the response of the enemy king (which is a random allowed action in the settings considered). The first three outputs are the same as for the Initialise_game method, but the variables are computed for the position reached after the update of the system. The fourth and fifth outputs are:

     - R $\;$ The reward. To change this, look at the OneStep method of the class where the rewards are set.
     
     - Done $\;$ A variable that is 1 if the episode has ended (checkmate or draw).
     
     
3. Features. Given the chessboard position, the method computes the features.

This information and a quick analysis of the class should be all you need to get going. The other functions that the class exploits are uncommented and constitute an example on how not to write a python code. You can take a look at them if you want, but it is not necessary.






In [6]:
## INITIALISE THE ENVIRONMENT

size_board = 4
env=Chess_Env(size_board)

# Initialize game and set fixed parameters

In [7]:
# INITIALISE THE PARAMETERS OF YOUR NEURAL NETWORK AND...
# PLEASE CONSIDER TO USE A MASK OF ONE FOR THE ACTION MADE AND ZERO OTHERWISE IF YOU ARE NOT USING VANILLA GRADIENT DESCENT...
# WE SUGGEST A NETWORK WITH ONE HIDDEN LAYER WITH SIZE 200. 


S,X,allowed_a=env.Initialise_game()

N_a=np.shape(allowed_a)[0]   # TOTAL NUMBER OF POSSIBLE ACTIONS
N_in=np.shape(X)[0]    ## INPUT SIZE

# Load Trained Models

In [8]:
qlearning = load_from("qlearning", "relu", None, name_extension="seed_21")
sarsa = load_from("sarsa", "relu", None, name_extension="seed_21")
dqn = load_from("dqn", "relu", None, name_extension="seed_21")

loading from: models/qlearning_relu_None_seed_21


AttributeError: 'builtin_function_or_method' object has no attribute 'randn'

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6, 3))
ax.plot(exponential_moving_average(qlearning.R_history)[0], label="Q-Learning")
ax.plot(exponential_moving_average(sarsa.R_history)[0], label="SARSA")
ax.plot(exponential_moving_average(dqn.R_history)[0], label="DQN")
ax.set_xlabel("Episodes")
ax.set_ylabel("Reward")
# ax.set_ylim(.75, 1)
ax.legend()
ax.set_title("EMA of Rewards per Episode")
plt.savefig("../figures/ema_rewards_per_episode_comparison.png", bbox_inches='tight')
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6, 3))
ax.plot(exponential_moving_average(qlearning.N_moves_history)[0], label="Q-Learning")
ax.plot(exponential_moving_average(sarsa.N_moves_history)[0], label="SARSA")
ax.plot(exponential_moving_average(dqn.N_moves_history)[0], label="DQN")
ax.set_xlabel("Episodes")
ax.set_ylabel("Number of moves")
# ax.set_ylim(0, 18)
ax.legend()
ax.set_title("EMA of Number of Moves per Episode")
plt.savefig("../figures/ema_number_of_moves_per_episode_comparison.png", bbox_inches='tight')
plt.show()

# Analyze Averaged Statistics over Multiple Runs (without Seed)

In [None]:
def plot_avg_rewards(method):
    R_mean, R_std, N_moves_mean, N_moves_std = load_avg_statistics(method)

    ema = exponential_moving_average(R_mean)[0]
    R_std = exponential_moving_average(R_std)[0]

    lower_bound = np.maximum(ema - R_std, 0)
    upper_bound = np.minimum(ema + R_std, 1)

    fig, ax = plt.subplots(1, 1, figsize=(6, 3))

    ax.plot(exponential_moving_average(R_mean)[0], label="Average Reward")
    ax.fill_between(np.arange(len(R_mean)), lower_bound, upper_bound, alpha=0.2, label="1 Sigma Confidence Interval")
    ax.legend()
    plt.show()

def plot_avg_n_moves(method):
    R_mean, R_std, N_moves_mean, N_moves_std = load_avg_statistics(method)

    ema = exponential_moving_average(N_moves_mean)[0]
    N_moves_std = exponential_moving_average(N_moves_std)[0]

    fig, axs = plt.subplots(1, 2, figsize=(12, 4))

    plt.plot(exponential_moving_average(N_moves_mean)[0], label="Average Number of Moves")
    plt.fill_between(np.arange(len(N_moves_mean)), ema-N_moves_std, ema+N_moves_std, alpha=0.2, label="95% Confidence Interval")
    plt.legend()
    plt.show()

plot_avg_rewards("qlearning")

In [None]:
plt.plot(R_mean, label="Q-Learning")
plt.plot()