In [1]:
import gym
from gym import spaces
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from stable_baselines3 import PPO, A2C

In [2]:
class EnhancedStockTradingEnv(gym.Env):
    """A stock trading environment for OpenAI gym"""
    metadata = {'render.modes': ['human']}

    def __init__(self, df):
        super(EnhancedStockTradingEnv, self).__init__()

        self.df = df
        self.reward_range = (0, 1)
        
        # Assuming 'Symbols' represents different assets, and you might want to trade multiple assets
        # Adjust according to your exact dataset and requirements
        self.n_assets = len(df['Symbols'].unique())
        
        # Action space: [sell(0), hold(1), buy(2)] for each asset
        self.action_space = spaces.MultiDiscrete([3] * self.n_assets)
        
        # Observation space: Include all features for each asset
        self.feature_names = ['Open', 'Adj Close', 'High', 'Low', 'Volume', 'RSI', 'MACD', 'post utility']
        self.n_features = len(self.feature_names)
        
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.n_assets *
                                                                             self.n_features,), dtype=np.float32)
        
        self.current_step = 0
        self.done = False

        self.initial_account_balance = 1000000
        self.current_account_balance = self.initial_account_balance
        self.positions = np.zeros(self.n_assets)
        self.total_profit = 0

    def _next_observation(self):
        obs = np.zeros(self.n_assets * self.n_features)
        for i, symbol in enumerate(self.df['Symbols'].unique()):
            # Filter df by symbol and step
            symbol_df = self.df[self.df['Symbols'] == symbol].reset_index(drop=True)
            if self.current_step < len(symbol_df):
                step_data = symbol_df.loc[self.current_step, self.feature_names]
                obs[i*self.n_features:(i+1)*self.n_features] = step_data.values
        return obs

    def step(self, action):
        prev_total_value = self._calculate_total_portfolio_value()
        self.done = self.current_step >= (len(self.df) // self.n_assets) - 1
        current_prices = self._get_current_prices()

        transaction_cost = 0.001  # transaction cost: 0.1%
        investment_fraction = 0.1  # Example: use 10% of current balance for each buy action

        for i, act in enumerate(action):
            # Current price of the asset
            current_price = current_prices[i]

            if act == 0:  # Sell
                if self.positions[i] > 0:  # Check if we hold the asset
                    # Update account balance, consider transaction costs
                    self.current_account_balance += (1 - transaction_cost) * current_price * self.positions[i]
                    self.positions[i] = 0
            elif act == 2 and current_price > 0:
                # Calculate max possible shares to buy with the allocated fraction of the balance
                allocated_balance = self.current_account_balance * investment_fraction
                max_shares = allocated_balance / (current_price * (1 + transaction_cost))
                shares_to_buy = max_shares  # Adjust this logic if needed
                self.current_account_balance -= shares_to_buy * current_price * (1 + transaction_cost)
                self.positions[i] += shares_to_buy

        self.current_step += 1

        # Update total profit based on the portfolio value change
        current_total_value = self._calculate_total_portfolio_value()
        self.total_profit += current_total_value - prev_total_value

        next_obs = self._next_observation()
        reward = self.total_profit  # Consider refining this based on your objective
        info = {'current_balance': self.current_account_balance, 'total_profit': self.total_profit}

        return next_obs, reward, self.done, info


    def _calculate_total_portfolio_value(self):
        """Calculate the total value of the portfolio: cash + assets"""
        current_prices = self._get_current_prices()
        assets_value = np.dot(self.positions, current_prices)
        total_value = self.current_account_balance + assets_value
        return total_value

    def _get_current_prices(self):
        """Get the current prices of assets"""
        current_prices = np.zeros(self.n_assets)
        for i, symbol in enumerate(self.df['Symbols'].unique()):
            # Filter df by symbol and step
            symbol_df = self.df[self.df['Symbols'] == symbol].reset_index(drop=True)
            if self.current_step < len(symbol_df):
                current_prices[i] = symbol_df.loc[self.current_step, 'Adj Close']
        return current_prices


    def reset(self):
        self.current_step = 0
        self.done = False
        self.positions = np.zeros(self.n_assets)
        self.current_account_balance = self.initial_account_balance
        self.total_profit = 0
        return self._next_observation()

    def render(self, mode='human', close=False):
        print(f'Step: {self.current_step}, Total Profit: {self.total_profit}')
        
    def reset_with_data(self, new_df):
        # Method to reset the environment with new data
        self.df = new_df
        self.reset()

In [3]:
df = pd.read_csv('stocks_data/stocks_panel_data_with_utlity.csv', index_col=0)
df = df.fillna(0)

In [4]:
df.sort_values('Date', inplace=True)
df.reset_index(inplace=True, drop=True)
df['Date'] = pd.to_datetime(df['Date'])
df_train = df[df['Date']< pd.to_datetime('2023-12-24')]
df_test = df[df['Date']> pd.to_datetime('2023-12-24')]
df_test

Unnamed: 0,Open,Adj Close,High,Low,Volume,Date,RSI,MACD,Symbols,post utility
23676,690.000,688.000,699.0000,682.0000,38876.0,2023-12-25,57.819905,-9.714975,LENT,-3.294974
23677,313.900,311.370,317.8300,307.9000,15568635.0,2023-12-25,69.583896,3.819175,MTLR,0.000000
23678,0.731,0.720,0.7314,0.7174,216418000.0,2023-12-25,35.370879,-0.019381,HYDR,-578.593915
23679,93.960,94.400,95.6000,91.5400,1125730.0,2023-12-25,63.959391,-0.800957,RENI,-313.799274
23680,643.800,644.800,655.0000,643.8000,39654.0,2023-12-25,52.531646,-7.448759,LSRG,13.091304
...,...,...,...,...,...,...,...,...,...,...
27169,55.900,56.245,56.4500,55.9000,9558260.0,2024-03-26,59.504132,0.125369,MAGN,0.000000
27170,180.300,183.200,184.6000,180.0000,1579575.0,2024-03-26,81.838565,7.595501,RNFT,0.000000
27171,99.380,101.400,101.6600,99.3800,533380.0,2024-03-26,43.605359,0.155967,RENI,0.000000
27172,900.500,904.500,910.0000,900.5000,48222.0,2024-03-26,54.625551,3.358290,AQUA,0.000000


In [5]:
env = EnhancedStockTradingEnv(df_train)

model = PPO("MlpPolicy", env, verbose=1, learning_rate=1e-4)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




In [None]:
model.learn(total_timesteps=100000)

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 422       |
|    ep_rew_mean     | -8.57e+06 |
| time/              |           |
|    fps             | 5         |
|    iterations      | 1         |
|    time_elapsed    | 405       |
|    total_timesteps | 2048      |
----------------------------------


In [None]:
env.reset_with_data(df_test)
portfolio_values = [1000000]
obs = env.reset()
done = False
while not done:
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, done, info = env.step(action)
    portfolio_values.append(env._calculate_total_portfolio_value())

initial_portfolio_value = portfolio_values[0]
pnl_ppo = [((pv - initial_portfolio_value) / initial_portfolio_value) * 100 for pv in portfolio_values] 

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(pnl_ppo, label="PnL")
plt.xlabel("Time Steps")
plt.ylabel("Profit and Loss (%)")
plt.title("Profit and Loss over Time")
plt.legend()
plt.show()

In [None]:
# Calculate daily returns
returns = np.diff(portfolio_values) / portfolio_values[:-1]

# Compute Sharpe ratio
mean_return = np.mean(returns)
std_return = np.std(returns)
sharpe_ratio = (mean_return) / std_return if std_return != 0 else 0

print(f"Sharpe Ratio: {sharpe_ratio:.2f}")

In [None]:
class EnhancedStockTradingEnv(gym.Env):
    """A stock trading environment for OpenAI gym"""
    metadata = {'render.modes': ['human']}

    def __init__(self, df):
        super(EnhancedStockTradingEnv, self).__init__()

        self.df = df
        self.reward_range = (0, 1)
        
        # Assuming 'Symbols' represents different assets, and you might want to trade multiple assets
        # Adjust according to your exact dataset and requirements
        self.n_assets = len(df['Symbols'].unique())
        
        # Action space: [sell(0), hold(1), buy(2)] for each asset
        self.action_space = spaces.MultiDiscrete([3] * self.n_assets)
        
        # Observation space: Include all features for each asset
        self.feature_names = ['Open', 'Adj Close', 'High', 'Low', 'Volume', 'RSI', 'MACD']
        self.n_features = len(self.feature_names)
        
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.n_assets *
                                                                             self.n_features,), dtype=np.float32)
        
        self.current_step = 0
        self.done = False

        self.initial_account_balance = 1000000
        self.current_account_balance = self.initial_account_balance
        self.positions = np.zeros(self.n_assets)
        self.total_profit = 0

    def _next_observation(self):
        obs = np.zeros(self.n_assets * self.n_features)
        for i, symbol in enumerate(self.df['Symbols'].unique()):
            # Filter df by symbol and step
            symbol_df = self.df[self.df['Symbols'] == symbol].reset_index(drop=True)
            if self.current_step < len(symbol_df):
                step_data = symbol_df.loc[self.current_step, self.feature_names]
                obs[i*self.n_features:(i+1)*self.n_features] = step_data.values
        return obs

    def step(self, action):
        prev_total_value = self._calculate_total_portfolio_value()
        self.done = self.current_step >= (len(self.df) // self.n_assets) - 1
        current_prices = self._get_current_prices()

        transaction_cost = 0.001  # transaction cost: 0.1%
        investment_fraction = 0.1  # Example: use 10% of current balance for each buy action

        for i, act in enumerate(action):
            # Current price of the asset
            current_price = current_prices[i]

            if act == 0:  # Sell
                if self.positions[i] > 0:  # Check if we hold the asset
                    # Update account balance, consider transaction costs
                    self.current_account_balance += (1 - transaction_cost) * current_price * self.positions[i]
                    self.positions[i] = 0
            elif act == 2 and current_price > 0:
                # Calculate max possible shares to buy with the allocated fraction of the balance
                allocated_balance = self.current_account_balance * investment_fraction
                max_shares = allocated_balance / (current_price * (1 + transaction_cost))
                shares_to_buy = max_shares  # Adjust this logic if needed
                self.current_account_balance -= shares_to_buy * current_price * (1 + transaction_cost)
                self.positions[i] += shares_to_buy

        self.current_step += 1

        # Update total profit based on the portfolio value change
        current_total_value = self._calculate_total_portfolio_value()
        self.total_profit += current_total_value - prev_total_value

        next_obs = self._next_observation()
        reward = self.total_profit  # Consider refining this based on your objective
        info = {'current_balance': self.current_account_balance, 'total_profit': self.total_profit}

        return next_obs, reward, self.done, info


    def _calculate_total_portfolio_value(self):
        """Calculate the total value of the portfolio: cash + assets"""
        current_prices = self._get_current_prices()
        assets_value = np.dot(self.positions, current_prices)
        total_value = self.current_account_balance + assets_value
        return total_value

    def _get_current_prices(self):
        """Get the current prices of assets"""
        current_prices = np.zeros(self.n_assets)
        for i, symbol in enumerate(self.df['Symbols'].unique()):
            # Filter df by symbol and step
            symbol_df = self.df[self.df['Symbols'] == symbol].reset_index(drop=True)
            if self.current_step < len(symbol_df):
                current_prices[i] = symbol_df.loc[self.current_step, 'Adj Close']
        return current_prices


    def reset(self):
        self.current_step = 0
        self.done = False
        self.positions = np.zeros(self.n_assets)
        self.current_account_balance = self.initial_account_balance
        self.total_profit = 0
        return self._next_observation()

    def render(self, mode='human', close=False):
        print(f'Step: {self.current_step}, Total Profit: {self.total_profit}')
        
    def reset_with_data(self, new_df):
        # Method to reset the environment with new data
        self.df = new_df
        self.reset()

In [None]:
env = EnhancedStockTradingEnv(df_train.drop('post utility', axis=1))

model = PPO("MlpPolicy", env, verbose=1, learning_rate=1e-4)

model.learn(total_timesteps=100000)

In [None]:
env.reset_with_data(df_test)
portfolio_values = [1000000]
obs = env.reset()
done = False
while not done:
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, done, info = env.step(action)
    portfolio_values.append(env._calculate_total_portfolio_value())

initial_portfolio_value = portfolio_values[0]
pnl_ppo_without_sent = [((pv - initial_portfolio_value) / 
                         initial_portfolio_value) * 100 for pv in portfolio_values] 

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(pnl_ppo_without_sent, label="PnL")
plt.xlabel("Time Steps")
plt.ylabel("Profit and Loss (%)")
plt.title("Profit and Loss over Time")
plt.legend()
plt.show()

In [None]:
# Calculate daily returns
returns = np.diff(portfolio_values) / portfolio_values[:-1]

# Compute Sharpe ratio
mean_return = np.mean(returns)
std_return = np.std(returns)
sharpe_ratio = (mean_return) / std_return if std_return != 0 else 0

print(f"Sharpe Ratio: {sharpe_ratio:.2f}")