## Import Dependencies

In [5]:
# gym stuff
import gym
import gym_anytrading

# stable baselines
from stable_baselines import A2C # algorithm
from stable_baselines.common.vec_env import DummyVecEnv

# processing libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

ModuleNotFoundError: No module named 'tensorflow'

Gym is a standard API for reinforcement learning, and a diverse collection of reference environments.

- The Gym interface is simple, pythonic, and capable of representing general RL problems.
- The open AI Gym Anytrading environment is a custom trading environment that you can use to trade a bunch of stocks, forex, cryptocurrencies, equities, and securities.

## Read Data

In [None]:
df = pd.read_csv("source/gme.csv")

In [None]:
df.head()

In [None]:
df["Date"] = pd.to_datetime(df["Date"])
df.dytypes

In [None]:
df.set_index("Date", inplace=True)
df.head()

In [None]:
env = gym.make("stocks-v0", df=df, frame_bound=(10,100), window_size=5)

In [None]:
env.signal_features[5]

## Build Environment

In [None]:
env.action_space

In [None]:
state = env.reset()

while True:
    action = env.action_space.sample()
    n_state, reward, done, info = env.step(action)
    
    if done:
        print("info", info)
        break
        
plt.figure(figsize=(15,6))
plt.cla()
env.render_all()
plt.show()

- action_space: Set of valid actions at this state
- step: Takes specified action and returns updated information gathered from environment such observation, reward, whether goal is reached or not and misc info useful for debugging.


- observation is specific to the environment; for example, in Mountain-Car it will return speed, velocity which is required for building the momentum in order to achieve the goal. In some cases, it will be raw pixel data.
- reward is amount achieved by last action. By default, goal is to maximize the reward (of course!)
- done this is really useful. This tells use when we are done and agent have achieved the goal.
- info this emits debug information useful when something goes wrong and you have to figure out what exactly agent is doing.

## Train

In [None]:
env_maker = lambda: gym.make("stocks-v0", df=df, frame_bound=(5,100), window_size=5)
env = DummyVecEnv([env_maker])

In [None]:
model0 = A2C("MlpLstmPolicy", env, verbose=1)
model0.learn(total_timesteps=1000000)

In [None]:
model1 = ACER("MlpLstmPolicy", env, verbose=1)
model1.learn(total_timesteps=1000000)

## Evaluation

### A2C Algorithm

In [None]:
env = gym.make("stocks-v0", df=df, frame_bound=(90,110), window_size=5)
obs = env.reset(0)

while True:
    obs = obs[np.newaxis, ...]
    action, _states = model0.predict(obs)
    obs, rewards, done, info = env.step(action)
    
    if done:
        print("info", info)
        break

In [None]:
plt.figure(figsize=(15,6))
plt.cla()
env.render_all()
plt.show()

### ACER Algorithm

In [None]:
env = gym.make("stocks-v0", df=df, frame_bound=(90,110), window_size=5)
obs = env.reset(0)

while True:
    obs = obs[np.newaxis, ...]
    action, _states = model1.predict(obs)
    obs, rewards, done, info = env.step(action)
    
    if done:
        print("info", info)
        break

In [None]:
plt.figure(figsize=(15,6))
plt.cla()
env.render_all()
plt.show()