In [1]:
%pip install tensorflow-gpu==2.9.1 tensorflow==2.9.1 stable-baselines3 gym-anytrading gym

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Gym stuff
import gym
import gym_anytrading

# Stable baselines3 library
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C

# Processing libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [3]:
df = pd.read_csv(r"C:\Users\levir\OneDrive\Desktop\DataSets\kaggle_ethereum_data\ETH-USD.csv")

In [4]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,11/9/2017,308.644989,329.451996,307.056,320.884003,320.884003,893249984
1,11/10/2017,320.67099,324.717987,294.541992,299.252991,299.252991,885985984
2,11/11/2017,298.585999,319.453003,298.191986,314.681,314.681,842300992
3,11/12/2017,314.690002,319.153015,298.513,307.90799,307.90799,1613479936
4,11/13/2017,307.024994,328.415009,307.024994,316.716003,316.716003,1041889984


In [5]:
df.dtypes
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1598 entries, 0 to 1597
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       1598 non-null   object 
 1   Open       1598 non-null   float64
 2   High       1598 non-null   float64
 3   Low        1598 non-null   float64
 4   Close      1598 non-null   float64
 5   Adj Close  1598 non-null   float64
 6   Volume     1598 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 87.5+ KB


In [6]:
# We attempted to lower the detail in our data to get more memory since our kernel kept crashing.
#df[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']] = df[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']].astype('float32')
#print(df.dtypes)

In [7]:
# Checking data types to see if we need any type conversions
df['Date'] = pd.to_datetime(df['Date'])

In [8]:
# We do this now so our data is in chronological order and going from past -> present, previously it was reversed. This needs to be put in place before we make actions in our environment.
# Updating present dataframe and not making a copy. We're also setting our index. Basically the key value other values are associated with or stem from.
df.sort_values('Date', ascending=True, inplace=True)
df.set_index('Date', inplace=True)
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-11-09,308.644989,329.451996,307.056,320.884003,320.884003,893249984
2017-11-10,320.67099,324.717987,294.541992,299.252991,299.252991,885985984
2017-11-11,298.585999,319.453003,298.191986,314.681,314.681,842300992
2017-11-12,314.690002,319.153015,298.513,307.90799,307.90799,1613479936
2017-11-13,307.024994,328.415009,307.024994,316.716003,316.716003,1041889984


In [9]:
# We don't need adjusted close price so we'll drop it from the data
df = df.drop('Adj Close', axis=1)
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-11-09,308.644989,329.451996,307.056,320.884003,893249984
2017-11-10,320.67099,324.717987,294.541992,299.252991,885985984
2017-11-11,298.585999,319.453003,298.191986,314.681,842300992
2017-11-12,314.690002,319.153015,298.513,307.90799,1613479936
2017-11-13,307.024994,328.415009,307.024994,316.716003,1041889984


In [10]:
df.dtypes

Open      float64
High      float64
Low       float64
Close     float64
Volume      int64
dtype: object

In [11]:
# Creating environement for reinforcement learning bot to learn how to trade.
# Window_size is time steps for reference data for prediction, referencing previous data entries starting from prediction
# Hypothesis, higher window_size == more accurate prediction
# stocks-v0 is a trading enviornment built by library stable baselines
env = gym.make('stocks-v0', df=df, frame_bound=(5,250), window_size=5)

In [12]:
env.signal_features

array([[ 3.20884003e+02,  0.00000000e+00],
       [ 2.99252991e+02, -2.16310120e+01],
       [ 3.14681000e+02,  1.54280090e+01],
       [ 3.07907990e+02, -6.77301000e+00],
       [ 3.16716003e+02,  8.80801300e+00],
       [ 3.37631012e+02,  2.09150090e+01],
       [ 3.33356995e+02, -4.27401700e+00],
       [ 3.30924011e+02, -2.43298400e+00],
       [ 3.32394012e+02,  1.47000100e+00],
       [ 3.47612000e+02,  1.52179880e+01],
       [ 3.54385986e+02,  6.77398600e+00],
       [ 3.66730011e+02,  1.23440250e+01],
       [ 3.60401001e+02, -6.32901000e+00],
       [ 3.80652008e+02,  2.02510070e+01],
       [ 4.10165985e+02,  2.95139770e+01],
       [ 4.74911011e+02,  6.47450260e+01],
       [ 4.66276001e+02, -8.63501000e+00],
       [ 4.71329987e+02,  5.05398600e+00],
       [ 4.80355011e+02,  9.02502400e+00],
       [ 4.72902008e+02, -7.45300300e+00],
       [ 4.27523010e+02, -4.53789980e+01],
       [ 4.47114014e+02,  1.95910040e+01],
       [ 4.66540009e+02,  1.94259950e+01],
       [ 4.

In [13]:
# This shows us the quantitity of actions within our enviornment, we're in a stock trading environment which has the actions buy and sell, we only have long and short positions
# Sell=0
# Buy=1
# More info -> https://github.com/AminHP/gym-anytrading
env.action_space

Discrete(2)

In [14]:
state = env.reset()
while True: 
    action = env.action_space.sample()
    n_state, reward, done, info = env.step(action)
    if done: 
        print("info", info)
        break
        
#plt.figure(figsize=(15,6))
#plt.cla()
#env.render_all()
#plt.show()

info {'total_reward': -272.3791809999994, 'total_profit': 0.23241507953026172, 'position': 0}


In [15]:
%pip install finta

Note: you may need to restart the kernel to use updated packages.


In [16]:
from gym_anytrading.envs import StocksEnv
from finta import TA

In [17]:
df['SMA'] = TA.SMA(df, 12)
df['RSI'] = TA.RSI(df)
df['OBV'] = TA.OBV(df)
df.fillna(0, inplace=True)

In [18]:
def add_signals(env):
    start = env.frame_bound[0] - env.window_size
    end = env.frame_bound[1]
    prices = env.df.loc[:, 'Low'].to_numpy()[start:end]
    signal_features = env.df.loc[:, ['Low', 'Volume','SMA', 'RSI', 'OBV']].to_numpy()[start:end]
    return prices, signal_features

In [19]:
class MyCustomEnv(StocksEnv):
    _process_data = add_signals
    
env2 = MyCustomEnv(df=df, window_size=12, frame_bound=(12,50))

In [20]:
env2.signal_features

array([[ 3.07056000e+02,  8.93249984e+08,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00],
       [ 2.94541992e+02,  8.85985984e+08,  0.00000000e+00,
         0.00000000e+00, -8.85985984e+08],
       [ 2.98191986e+02,  8.42300992e+08,  0.00000000e+00,
         4.34421109e+01, -4.36849920e+07],
       [ 2.98513000e+02,  1.61347994e+09,  0.00000000e+00,
         3.60400477e+01, -1.65716493e+09],
       [ 3.07024994e+02,  1.04188998e+09,  0.00000000e+00,
         4.83622903e+01, -6.15274944e+08],
       [ 3.16763000e+02,  1.06968000e+09,  0.00000000e+00,
         6.54055550e+01,  4.54405056e+08],
       [ 3.29812988e+02,  7.22665984e+08,  0.00000000e+00,
         6.09764989e+01, -2.68260928e+08],
       [ 3.23605988e+02,  7.97254016e+08,  0.00000000e+00,
         5.85460751e+01, -1.06551494e+09],
       [ 3.27523010e+02,  6.21732992e+08,  0.00000000e+00,
         5.95939960e+01, -4.43781952e+08],
       [ 3.27687012e+02,  6.49638976e+08,  0.00000000e+00,
         6.84778747e+01

In [21]:
env_maker = lambda: env2
env = DummyVecEnv([env_maker])

In [22]:
model = A2C('MlpPolicy', env, verbose=1) 
model.learn(total_timesteps=10000)#00

Using cpu device
------------------------------------
| time/                 |          |
|    fps                | 1250     |
|    iterations         | 100      |
|    time_elapsed       | 0        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.686   |
|    explained_variance | 0.027    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 11.5     |
|    value_loss         | 1.17e+03 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 1202     |
|    iterations         | 200      |
|    time_elapsed       | 0        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -0.683   |
|    explained_variance | -0.213   |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss        | 43.2     |
|    value_loss      

<stable_baselines3.a2c.a2c.A2C at 0x18532bbf400>

In [23]:
env = MyCustomEnv(df=df, window_size=12, frame_bound=(80,250))
obs = env.reset()
while True: 
    obs = obs[np.newaxis, ...]
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    if done:
        print("info", info)
        break

info {'total_reward': -433.0979610000001, 'total_profit': 0.3499950935865232, 'position': 0}
