In [None]:
import gymnasium as gym
import gym_anytrading

from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C, PPO

import numpy as np
import pandas as pd
from matplotlib import pylab as plt

# DATA PREPROCESSING

In [None]:
df = pd.read_csv('./gmedata.csv')
df.head()

In [None]:
df["Date"] = pd.to_datetime(df["Date"])
df.dtypes

In [None]:
df.set_index("Date",inplace=True)
df.head()

# SETTING UP ENVIRONMENT AND RANDOM TESTING 

In [None]:
env = gym.make('stocks-v0', df=df, frame_bound=(5,100), window_size=5)

In [None]:
env.signal_features

In [None]:
env.action_space.sample() # Discrete action space 0 and 1 for buy and sell

In [None]:
state = env.reset()
while True:
    action = env.action_space.sample()
    n_state, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        print(info)
        break

plt.figure(figsize=(15, 6))
plt.cla()
env.render_all()
plt.show()

# TRAINING THE MODEL RIGHT

In [None]:
env_maker = lambda: gym.make("stocks-v0", df=df, frame_bound=(5, 100), window_size=5)   
env = DummyVecEnv([env_maker])

In [None]:
model = PPO('MlpLstmPolicy', env, verbose=1)
model.learn(total_timesteps=100000)


In [None]:
env = gym.make("stocks-v0", df=df, frame_bound=(80, 100), window_size=5)
state = env.reset()

while True:
    state = state[np.newaxis, ...]
    action, _states = model.predict(state)
    state, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        print("Info:", info)
        break

In [None]:
plt.figure(figsize=(15, 6))
plt.cla()
env.render_all()
plt.show()