In [1]:
import numpy as np
import pandas as pd
import gymnasium as gym

from env.environments import TradingEnv


## Read training data

In [2]:
def read_adjusted_history_ohlcv(path_csv):
    history = pd.read_csv(path_csv, parse_dates=["Date"], index_col='Date', \
        dtype={'Open': float, 'High': float, 'Low': float, 'Close': float,'Volume': float})
    history = history.rename(columns={'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume'})
    history.sort_index(inplace= True)
    history.dropna(inplace= True)
    history.drop_duplicates(inplace=True)
    return history

price_history = read_adjusted_history_ohlcv('yfinance/ibm.1d.adjusted.csv')
price_history

Unnamed: 0_level_0,open,high,low,close,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1962-01-02 00:00:00-05:00,1.543904,1.543904,1.526557,1.526557,1.948437e+06
1962-01-03 00:00:00-05:00,1.526556,1.539900,1.526556,1.539900,1.461328e+06
1962-01-04 00:00:00-05:00,1.539900,1.539900,1.523888,1.524555,1.311448e+06
1962-01-05 00:00:00-05:00,1.522554,1.522554,1.491862,1.494531,1.836027e+06
1962-01-08 00:00:00-05:00,1.493197,1.493197,1.454499,1.466509,2.735306e+06
...,...,...,...,...,...
2024-05-13 00:00:00-04:00,167.500000,168.059998,166.759995,167.559998,2.414900e+06
2024-05-14 00:00:00-04:00,167.860001,168.130005,166.479996,167.360001,2.601000e+06
2024-05-15 00:00:00-04:00,167.940002,168.350006,167.339996,168.259995,4.468800e+06
2024-05-16 00:00:00-04:00,168.259995,169.630005,167.789993,168.970001,3.492300e+06


## Generate features

In [3]:
price_history['median'] = (price_history['high'] + price_history['low']) / 2
price_history['typical'] = (price_history['high'] + price_history['low'] + price_history['close']) / 3

price_history["feature_open"] = price_history["open"]/price_history["close"]
price_history["feature_high"] = price_history["high"]/price_history["close"]
price_history["feature_low"] = price_history["low"]/price_history["close"]
price_history["feature_close"] = price_history["close"].pct_change()
price_history["feature_volume"] = price_history["volume"] / price_history["volume"].rolling(252).max()

price_history.dropna(inplace= True)
price_history

Unnamed: 0_level_0,open,high,low,close,volume,median,typical,feature_open,feature_high,feature_low,feature_close,feature_volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1962-12-31 00:00:00-05:00,1.048241,1.054250,1.040229,1.041564,5.242114e+06,1.047240,1.045348,1.006410,1.012179,0.998718,-0.006369,0.157063
1963-01-02 00:00:00-05:00,1.041564,1.041564,1.026208,1.027543,1.194454e+07,1.033886,1.031772,1.013645,1.013645,0.998700,-0.013462,0.357879
1963-01-03 00:00:00-05:00,1.029547,1.058924,1.029547,1.058924,5.616550e+06,1.044235,1.049132,0.972257,1.000000,0.972257,0.030540,0.168282
1963-01-04 00:00:00-05:00,1.060259,1.070274,1.060259,1.060259,7.675956e+06,1.065266,1.063597,1.000000,1.009446,1.000000,0.001260,0.229985
1963-01-07 00:00:00-05:00,1.060259,1.070274,1.049577,1.051580,4.418352e+06,1.059926,1.057144,1.008254,1.017778,0.998095,-0.008186,0.132382
...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-13 00:00:00-04:00,167.500000,168.059998,166.759995,167.559998,2.414900e+06,167.409996,167.459997,0.999642,1.002984,0.995226,0.002453,0.080063
2024-05-14 00:00:00-04:00,167.860001,168.130005,166.479996,167.360001,2.601000e+06,167.305000,167.323334,1.002988,1.004601,0.994742,-0.001194,0.086233
2024-05-15 00:00:00-04:00,167.940002,168.350006,167.339996,168.259995,4.468800e+06,167.845001,167.983332,0.998098,1.000535,0.994532,0.005378,0.148157
2024-05-16 00:00:00-04:00,168.259995,169.630005,167.789993,168.970001,3.492300e+06,168.709999,168.796666,0.995798,1.003906,0.993016,0.004220,0.115783


## Make history small

In [4]:
price_history = price_history[:15]
price_history

Unnamed: 0_level_0,open,high,low,close,volume,median,typical,feature_open,feature_high,feature_low,feature_close,feature_volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1962-12-31 00:00:00-05:00,1.048241,1.05425,1.040229,1.041564,5242114.0,1.04724,1.045348,1.00641,1.012179,0.998718,-0.006369,0.157063
1963-01-02 00:00:00-05:00,1.041564,1.041564,1.026208,1.027543,11944540.0,1.033886,1.031772,1.013645,1.013645,0.9987,-0.013462,0.357879
1963-01-03 00:00:00-05:00,1.029547,1.058924,1.029547,1.058924,5616550.0,1.044235,1.049132,0.972257,1.0,0.972257,0.03054,0.168282
1963-01-04 00:00:00-05:00,1.060259,1.070274,1.060259,1.060259,7675956.0,1.065266,1.063597,1.0,1.009446,1.0,0.00126,0.229985
1963-01-07 00:00:00-05:00,1.060259,1.070274,1.049577,1.05158,4418352.0,1.059926,1.057144,1.008254,1.017778,0.998095,-0.008186,0.132382
1963-01-08 00:00:00-05:00,1.060259,1.077619,1.060259,1.077619,9023923.0,1.068939,1.071832,0.983891,1.0,0.983891,0.024762,0.270372
1963-01-09 00:00:00-05:00,1.077618,1.088301,1.071609,1.078286,9548140.0,1.079955,1.079399,0.999381,1.009288,0.993808,0.000619,0.286079
1963-01-10 00:00:00-05:00,1.080957,1.092307,1.080957,1.085631,7750841.0,1.086632,1.086298,0.995695,1.00615,0.995695,0.006811,0.232229
1963-01-11 00:00:00-05:00,1.085631,1.08897,1.078955,1.08897,6739857.0,1.083962,1.085631,0.996934,1.0,0.990803,0.003076,0.201938
1963-01-14 00:00:00-05:00,1.088968,1.107663,1.088968,1.106327,12356420.0,1.098315,1.100986,0.984309,1.001207,0.984309,0.01594,0.37022


## Run

In [5]:
# Create your own reward function with the history object
def reward_function(history):
    return np.log(history["portfolio_valuation", -1] / history["portfolio_valuation", -2]) #log (p_t / p_t-1 )

env = gym.make(
        "TradingEnv-v0",
#env = TradingEnv(
        name= "environment test",
        df = price_history,
        price_column_name = 'typical',
        windows= 3,
        positions = [0, 1], # From -1 (=SHORT), to +1 (=LONG)
        initial_position = 0, #'random', #Initial position
        trading_fees = 0.01/100, # 0.01% per stock buy / sell
        borrow_interest_rate= 0.0003/100, #per timestep (= 1h here)
        reward_function = reward_function,
        portfolio_initial_value = 1000, # in FIAT (here, USD)
        max_episode_duration = 'max', #500,
    )

env.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
env.add_metric('Episode Lenght', lambda history : len(history['position']) )

done, truncated = False, False
observation, info = env.reset()
print('info', info)
print('observation', observation)
while not done and not truncated:
    action = env.action_space.sample()
    observation, reward, done, truncated, info = env.step(action)
    print('done, truncated, action, reward', done, truncated, action, reward)
    print('info', info)
    print('observation', observation)
# Save for render
# env.save_for_render()

info {'idx': 2, 'step': 0, 'date': Timestamp('1963-01-03 00:00:00-0500', tz='UTC-05:00'), 'position_index': 0, 'position': 0, 'real_position': 0, 'data_typical': 1.0491317002355418, 'data_median': 1.0442354512779952, 'data_high': 1.0589241981506348, 'data_low': 1.0295467044053557, 'data_open': 1.0295467044053557, 'data_volume': 5616549.630300916, 'data_close': 1.0589241981506348, 'portfolio_valuation': 1000.0, 'portfolio_distribution_asset': 0, 'portfolio_distribution_fiat': 1000.0, 'portfolio_distribution_borrowed_asset': 0, 'portfolio_distribution_borrowed_fiat': 0, 'portfolio_distribution_interest_asset': 0, 'portfolio_distribution_interest_fiat': 0, 'reward': 0}
observation [[ 1.0064104   1.0121794   0.99871796 -0.00636941  0.15706289  0.
   0.        ]
 [ 1.0136452   1.0136452   0.9987004  -0.01346186  0.35787916  0.
   0.        ]
 [ 0.97225726  1.          0.97225726  0.03053997  0.16828163  0.
   0.        ]]
done, truncated, action, reward False False 0 0.0
info {'idx': 3, 'st

  logger.warn(
