## Use of VFA methods
The goal of this notebook is to show if it is possible to use VFA methods in the Slimevolley gym environment.

In [1]:
from os.path import join
from slime import Slime
import pandas as pd
import numpy as np
import gym
import slimevolleygym
from algorithms import *

# Define our input folders and output files
DATADIR = join('..', 'data')

We get the csv file in which we saved the information of our centroids. In this case, we will use the 50K file.

In [2]:
centroids = pd.read_csv(join(DATADIR, 'c_50_mix.csv'))

We create the definition of the function for the feature vector, which is encapsulated in a Featurizer class.

In [3]:
slime_env = gym.make('SlimeVolley-v0')

In [4]:
featurizer = Featurizer(slime_env.observation_space, np.array(centroids))

### 50K centroids

In [None]:
q = LambdaValueFunction(featurizer, slime_env.action_space.n, 0.5)
slime_q, slime_history, slime_history_average = sarsa_lambda(slime_env, q, 20000, 1_000_000, epsilon = 0.01) # alpha and epsilon values by default

In [None]:
plot_history(slime_history, slime_history_average)

In [None]:
alpha = 0.001
beta = 0.005
v = ValueFunctionRB(featurizer)
pi = PolicyEstimator(featurizer, slime_env.action_space.n)
slime_pi, slime_history_b, slime_average_history_b = reinforce_baseline(slime_env, pi, v, 20000, 1_000_000,
                                                                         alpha, beta, gamma = 0.98)

In [None]:
plot_history(slime_history_b, slime_average_history_b)

In [None]:
env = gym.make('SlimeVolley-v0')
S = env.reset()
G = 0     
while True:
    action = slime_pi[S]
    obs, reward, done, info = env.step(action)
    env.render()