# 0. Install and Import dependencies

In [1]:
# Gym stuff
import gymnasium as gym
import gym_anytrading

# Stable baselines - rl stuff
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import DQN
from stable_baselines3 import A2C

# Processing libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

# gym trading and finta
from gym_anytrading.envs import StocksEnv
from finta import TA

from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt
import os

# 1. Data import and data scaling

In [2]:
df_train = pd.read_csv('data/df_train.csv', index_col=0)
df_train.index = pd.to_datetime(df_train.index)

df_test = pd.read_csv('data/df_test.csv', index_col=0)
df_test.index = pd.to_datetime(df_test.index)

In [3]:
scaler = MinMaxScaler(feature_range=(0, 1))

In [4]:
df_train_scaled = pd.DataFrame(scaler.fit_transform(df_train), columns = df_train.columns)
df_test_scaled = pd.DataFrame(scaler.transform(df_test), columns = df_test.columns)

In [5]:
# features = df_train.drop('Price', axis=1).columns.tolist()
features = ['rsi', 'ema', 'roc']

## 2. Create the environment

In [6]:
def add_signals(env):
    start = env.frame_bound[0] - env.window_size
    end = env.frame_bound[1]
    prices = env.df.loc[:, 'Price'].to_numpy()[start:end]
    signal_features = env.df.loc[:, features].to_numpy()[start:end]
    
    return prices, signal_features

In [7]:
class MyCustomEnv(StocksEnv):
    _process_data = add_signals
    
env2 = MyCustomEnv(df=df_train, window_size=12, frame_bound=(12,440473))

# 3. Build Environment and Train

In [8]:
log_path = os.path.join('Training', 'Logs')

In [9]:
env_maker = lambda: env2
env = DummyVecEnv([env_maker])

In [10]:
model = A2C('MlpPolicy', env, learning_rate=0.001, verbose=2, tensorboard_log=log_path)

Using cpu device


In [11]:
model.learn(total_timesteps=df_train.shape[0] * 2, log_interval=1000)

Logging to Training\Logs\A2C_2
------------------------------------
| time/                 |          |
|    fps                | 937      |
|    iterations         | 1000     |
|    time_elapsed       | 5        |
|    total_timesteps    | 5000     |
| train/                |          |
|    entropy_loss       | -0.683   |
|    explained_variance | -0.00148 |
|    learning_rate      | 0.001    |
|    n_updates          | 999      |
|    policy_loss        | -0.00125 |
|    value_loss         | 3.66e-06 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 987      |
|    iterations         | 2000     |
|    time_elapsed       | 10       |
|    total_timesteps    | 10000    |
| train/                |          |
|    entropy_loss       | -0.608   |
|    explained_variance | 0        |
|    learning_rate      | 0.001    |
|    n_updates          | 1999     |
|    policy_loss        | -0.00065 |
|    va

<stable_baselines3.a2c.a2c.A2C at 0x23b5aa967d0>

In [12]:
model.save("Training/Saved Models/A2C")

# 4. Evaluation

In [13]:
env_test = MyCustomEnv(df=df_test, window_size=12, frame_bound=(12, 224933))
obs = env_test.reset()[0]

while True: 
    action, _states = model.predict(obs)
    obs, rewards, extra, done, info = env_test.step(action)
    if done:
        print("info", info)
        break
        
short_ticks, long_ticks = env_test.render_all()

info {'total_reward': 0.0, 'total_profit': 0.7336933768734825, 'position': <Positions.Long: 1>}


In [None]:
# Plot the price data
plt.figure(figsize=(22, 8))  # Set the figure size for better visibility
plt.plot(df_test['Price'], label='Price', color='blue')  # Plotting the price with a blue line

# Plot short ticks - assuming these are short positions
# plt.scatter(short_ticks, df_test.iloc[short_ticks]['Price'], color='red', label='Short Positions', marker='v')  # Red downward-pointing triangles

# Plot long ticks - assuming these are long positions
plt.scatter(long_ticks, df_test.iloc[long_ticks]['Price'], color='green', label='Long Positions', marker='^')  # Green upward-pointing triangles

plt.title('Stock Prices with Trading Positions')  # Title of the plot
plt.xlabel('Date')  # X-axis label
plt.ylabel('Price')  # Y-axis label
plt.legend()  # Add a legend to indicate what each marker represents
plt.show()

In [None]:
short_ticks