# 0. Install and Import dependencies

In [None]:
# !pip install tensorflow-gpu==1.15.0 tensorflow==1.15.0 stable-baselines gym-anytrading gym

In [2]:
# Gym stuff
import gymnasium as gym
import gym_anytrading # brings in template environments

# Stable baselines - rl stuff
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C

# Processing libraries
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt

# 1. Bring in Marketwatch GME Data 
https://www.marketwatch.com/investing/stock/gme/download-data?startDate=11/1/2019&endDate=03/12/2021

In [12]:
gmepath = os.path.join('Data', 'GME.csv')

df = pd.read_csv(gmepath)

In [13]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,07/09/2024,24.6,25.18,24.0,24.6,9523548
1,07/08/2024,24.12,25.14,23.85,24.45,11815530
2,07/05/2024,24.18,25.08,23.82,24.18,11782060
3,07/03/2024,24.03,24.89,23.65,24.37,11829530
4,07/02/2024,23.24,24.32,23.1,23.98,13703520


In [14]:
# converting to date time format
df['Date'] = pd.to_datetime(df['Date'])
df.dtypes

Date      datetime64[ns]
Open             float64
High             float64
Low              float64
Close            float64
Volume            object
dtype: object

In [None]:
# setting the date column to the index as this is required by the gym environment
df.set_index('Date', inplace=True)
df.head()

In [33]:
env = gym.make('stocks-v0', df=df, frame_bound=(5,100), window_size=5)
# the window size defines what the agent sees
# the frame bound defines the range of data the agent can use to make decisions

In [18]:
env.signal_features

  logger.warn(


array([[ 2.460e+01,  0.000e+00],
       [ 2.445e+01, -1.500e-01],
       [ 2.418e+01, -2.700e-01],
       [ 2.437e+01,  1.900e-01],
       [ 2.398e+01, -3.900e-01],
       [ 2.333e+01, -6.500e-01],
       [ 2.469e+01,  1.360e+00],
       [ 2.509e+01,  4.000e-01],
       [ 2.420e+01, -8.900e-01],
       [ 2.493e+01,  7.300e-01],
       [ 2.365e+01, -1.280e+00],
       [ 2.393e+01,  2.800e-01],
       [ 2.559e+01,  1.660e+00],
       [ 2.470e+01, -8.900e-01],
       [ 2.522e+01,  5.200e-01],
       [ 2.870e+01,  3.480e+00],
       [ 2.912e+01,  4.200e-01],
       [ 2.546e+01, -3.660e+00],
       [ 3.049e+01,  5.030e+00],
       [ 2.483e+01, -5.660e+00],
       [ 2.822e+01,  3.390e+00],
       [ 4.655e+01,  1.833e+01],
       [ 3.157e+01, -1.498e+01],
       [ 2.650e+01, -5.070e+00],
       [ 2.800e+01,  1.500e+00],
       [ 2.314e+01, -4.860e+00],
       [ 2.261e+01, -5.300e-01],
       [ 2.124e+01, -1.370e+00],
       [ 2.378e+01,  2.540e+00],
       [ 1.900e+01, -4.780e+00],
       [ 1

# 2. Build Environment

In [19]:
env.action_space

Discrete(2)

In [40]:
state = env.reset()
while True: 
    action = env.action_space.sample()
    n_state, rewards, done, info = env.step(action)
    if done: 
        print("info", info)
        break
        
plt.figure(figsize=(15,6))
plt.cla()
env.render_all()
plt.show()

IndexError: invalid index to scalar variable.

# 3. Build Environment and Train

In [37]:
env_maker = lambda: gym.make('stocks-v0', df=df, frame_bound=(5,100), window_size=5)
env = DummyVecEnv([env_maker])

In [39]:
model = A2C('MlpPolicy', env, verbose=1) 
model.learn(total_timesteps=1000000)

Using cpu device
------------------------------------
| time/                 |          |
|    fps                | 2125     |
|    iterations         | 100      |
|    time_elapsed       | 0        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.594   |
|    explained_variance | 0.0784   |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.464    |
|    value_loss         | 1.6      |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 2507     |
|    iterations         | 200      |
|    time_elapsed       | 0        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -0.672   |
|    explained_variance | -0.0157  |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss        | 0.617    |
|    value_loss      

<stable_baselines3.a2c.a2c.A2C at 0x312b34560>

# 4. Evaluation

In [46]:
output = env.step(action)

In [47]:
output

(array([[14.95, -0.27],
        [14.27, -0.68],
        [14.24, -0.03],
        [14.21, -0.03],
        [13.68, -0.53]], dtype=float32),
 0,
 False,
 False,
 {'total_reward': 0.0, 'total_profit': 1.0, 'position': <Positions.Short: 0>})

In [49]:
env = gym.make('stocks-v0', df=df, frame_bound=(90,110), window_size=5)
obs = env.reset()
while True: 
    obs = obs[0][np.newaxis, ...]  # Access the first element of the tuple
    action, _states = model.predict(obs)
    obs, rewards, done, info, _ = env.step(action)
    if done:
        print("info", info)
        break

ValueError: cannot reshape array of size 2 into shape (5,2)

In [None]:
plt.figure(figsize=(15,6))
plt.cla()
env.render_all()
plt.show()