In [1]:
import sys  
sys.path.append("./src")

import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=DeprecationWarning)
    from stable_baselines3 import PPO
    import pandas as pd
    import numpy as np
    from gym_trading_env.environments import TradingEnv
    import gymnasium as gym
    from gym_trading_env.renderer import Renderer
    import torch as th



df = pd.read_csv(r"C:\Users\user\Documents\GitHub\Gym-Trading-Env\pistar\data\SS00001.csv", parse_dates=["date"], index_col= "date")
df.dropna(inplace=True)

df['feature_close'] = (df['close'] - df['close'].min()) / (df['close'].max() - df['close'].min())
df["feature_volume"] = df['volume'].copy()
df["feature_RSI"] = df["RSI"].copy()
df["feature_MACD"] = df["MACD"].copy()
df["feature_CCI"] = df["CCI"].copy()
df["feature_ADX"] = df["ADX"].copy()


def basic_reward_function(history):
    return np.log(history["portfolio_valuation", -1] / history["portfolio_valuation", -2])

def basic_reward_function_when_execute(history):
    return np.log(history["portfolio_valuation", -1] / history["portfolio_valuation", 0])

def sharpe_reward_function_when_execute(history):
    return np.sum(history["portfolio_cum_return", -1]) / np.std(history["portfolio_cum_return", -1])



def dynamic_feature_last_position_taken(history):
    return history['position', -1]

def dynamic_feature_asset(history):
    return history['asset', -1]

def dynamic_feature_fiat(history):
    return history['fiat', -1]

def dynamic_feature_step(history):
    return history['step', -1] / 128

env = gym.make(
        "TradingEnv",
        name= "stock",
        df = df,
        positions = [-2,-1, 0, 1,2],
        dynamic_feature_functions = [dynamic_feature_last_position_taken, 
                                             dynamic_feature_asset, 
                                             dynamic_feature_fiat,
                                             dynamic_feature_step],
        reward_function_when_execute = basic_reward_function_when_execute,
        windows = 1,
        trading_fees = 0.04/100,
        portfolio_initial_value = 1000,
        initial_position ='random',
        max_episode_duration = 256,
        max_position_duration = 128,
        verbose = 1,
        render_mode= "logs",
        random_start = True
    )

env.unwrapped.add_metric('Sharpe ratio', lambda history : np.sum(history['portfolio_return', -1])/np.std(history['portfolio_return', -1]))
env.unwrapped.add_metric('Sortino ratio', lambda history : np.sum(history['portfolio_return'][-1])/np.std(history['portfolio_return'][-1][history['portfolio_return'][-1] < 0]))
env.unwrapped.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0))
env.unwrapped.add_metric('Episode Lenght', lambda history : len(history['position']))



  import cgi
  from urllib3.contrib.pyopenssl import orig_util_SSLContext as SSLContext
  from tensorflow.tsl.python.lib.core import pywrap_ml_dtypes
  from pandas.core import (


In [109]:
observation, info = env.reset()
print(f"idx: {env.unwrapped.historical_info['idx', -1]}")
print(f"step: {env.unwrapped.historical_info['step', -1]}")
print(f"position: {env.unwrapped.historical_info['position', -1]}")
print(f"reward: {env.unwrapped.historical_info['reward', -1]}")
print(f"portfolio_return: {env.unwrapped.historical_info['portfolio_return', -1]}")
print(f"execute: {env.unwrapped.historical_info['execute', -1]}")
print(f"fiat: {env.unwrapped.historical_info['fiat', -1]} | asset: {env.unwrapped.historical_info['asset', -1]}")
print(f"portfolio_valuation: {env.unwrapped.historical_info['portfolio_valuation', -1]}")

observation.shape

idx: 343
step: 0
position: 0
reward: 0
portfolio_return: []
execute: False
fiat: 1000.0 | asset: 0.0
portfolio_valuation: 1000.0


(1, 10)

In [140]:
observation, reward, done, truncated, info = env.step(2)
print("done:",done)
print()
print(f"idx: {env.unwrapped.historical_info['idx', -1]}")
print(f"step: {env.unwrapped.historical_info['step', -1]}")
print(f"position: {env.unwrapped.historical_info['position', -1]}")
print(f"reward: {env.unwrapped.historical_info['reward', -1]}")
print(f"portfolio_return: {env.unwrapped.historical_info['portfolio_return', -1]}")
print(f"execute: {env.unwrapped.historical_info['execute', -1]}")
print(f"fiat: {env.unwrapped.historical_info['fiat', -1]} | asset: {env.unwrapped.historical_info['asset', -1]}")
print(f"portfolio_valuation: {env.unwrapped.historical_info['portfolio_valuation', -1]}")


observation

Market Return : -0.44%   |   Portfolio Return :  0.34%   |   Sharpe ratio : 0.24537967840074143   |   Sortino ratio : 0.7670640134528575   |   Position Changes : 3   |   Episode Lenght : 32   |   
done: True

idx: 374
step: 31
position: 0
reward: 0.01152402947677462
portfolio_return: [ 0.         -0.00040008 -0.01196142 -0.00928822  0.00043624 -0.00992681
  0.01380555  0.00469965 -0.00621408  0.00616919  0.00331857 -0.01231532
 -0.00867166  0.00725094 -0.00482462 -0.00553866 -0.0125418   0.00522762
 -0.00850466 -0.01204146 -0.02018883 -0.01455461  0.02208618  0.01491594
  0.00551047  0.05062102  0.0003868   0.00601826  0.01704476 -0.00899493
 -0.00817899]
execute: True
fiat: 999.2 | asset: 0.003664259791407387
portfolio_valuation: 1003.350638839523


array([[ 2.3862917e-02,  0.0000000e+00,  5.0874832e+01, -5.2322025e+00,
         6.9686714e+01,  2.6076548e+01,  0.0000000e+00,  3.6642598e-03,
         9.9920001e+02,  2.4218750e-01]], dtype=float32)

In [72]:
np.sum([-0.02174972, -0.00079149])

-0.02254121

In [70]:
np.std([-0.02174972, -0.00079149,  0.00623871,  0.01041037,  0.00836543,  0.00373657,
  0.01275013,  0.00589088,  0.00718182, -0.00290046,  0.01138443])

0.00925666727207552

In [17]:
model = PPO.load(r"C:\Users\user\Documents\GitHub\Gym-Trading-Env\ppo_chart_2.zip")

In [18]:
model.predict(observation)

(array(6, dtype=int64), None)

In [3]:
import numpy as np

np.tanh(2.410369902185561)

0.9840072715261773