# Asset Portfolio Management using Deep Reinforcement Learning
---

## 6.0 Deep Reinforcement Learning Portfolio with Transaction costs

### 6.1 Import Packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pylab as plt
matplotlib.use('Agg')
import datetime
from pypfopt import EfficientFrontier, objective_functions
from pypfopt import risk_models
from pypfopt import expected_returns
from gym.utils import seeding
import gym
from gym import spaces
matplotlib.use('Agg')
from stable_baselines3.common.vec_env import DummyVecEnv

In [2]:
#pip install shap

In [3]:
#pip install shimmy

In [4]:
#pip install stockstats

In [5]:
#pip install exchange-calendars

In [6]:
#pip install wrds

In [7]:
#pip install ipywidgets

### 6.2 Load Data

In [9]:
train_data = pd.read_csv('./datasets/train_data.csv')
test_data = pd.read_csv('./datasets/test_data.csv')

In [10]:
tech_indicator_list = ["open", "high", "low", "close", "volume", "atr", "sma", "rsi", "oil_chg", 'yield_chg']

In [11]:
#train_data

### 6.3 Implement DRL

In [13]:
import importlib
import TC_my_env
from TC_my_env import StockPortfolioEnv
import gym

In [14]:
import TC_my_models
from TC_my_models import DRLAgent

In [15]:
stock_dimension = len(train_data.tic.unique())
state_space = stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 10, State Space: 10


In [16]:
weights_initial = [1/stock_dimension]*stock_dimension

In [17]:
env_kwargs = { 
    "hmax": 500, 
    "initial_amount": 1000000, 
    "transaction_cost_pct": 0.001, 
    "state_space": 15, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": tech_indicator_list, 
    "action_space": stock_dimension, 
    "reward_scaling": 1,
    'initial_weights': [1/stock_dimension]*stock_dimension
}

In [18]:
#print(train_data.dtypes)

In [19]:
#train_data

In [20]:
#Date conversion
#train_data['date'] = pd.to_datetime(train_data['date'], errors='coerce')
#train_data['date'] = train_data['date'].map(pd.Timestamp.timestamp).astype('float64')
#tic conversion
#train_data['tic'] = train_data['tic'].astype('category').cat.codes.astype('float64')
#volume and obv conversion
#train_data['obv'] = train_data['obv'].astype('float64')
#train_data['volume'] = train_data['volume'].astype('float64')
#print(train_data.dtypes)

In [21]:
e_train_gym = StockPortfolioEnv(df = train_data, **env_kwargs)

In [22]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>




In [23]:
real_env = env_train.envs[0]    # <-- This is your actual StockPortfolioEnv instance
#print("Shape of self.data.close:", real_env.data.close.shape)
#print("Shape of self.action space:", real_env.action_space.shape)
#print("Shape of self.state", real_env.state.shape)
#print("Length of date_memory:", len(real_env.date_memory))
#print(real_env.data['date'].iloc[0])  # Should print the first date
#print("Length of actions_memory:", len(real_env.actions_memory))
#actions = real_env.actions_memory[-1]  # Replace this with how you're storing actions
#weights = real_env.softmax_normalization(actions)
#print("Shape of weights:", weights.shape)
#print("Weights:", weights)

In [24]:
#real_env.df

In [25]:
#real_env.state

In [26]:
#real_env.data.close

#### 6.3.1 Model: A2C

In [28]:
agent = DRLAgent(env = env_train)

A2C_PARAMS = {
    "n_steps": 10,  # Number of steps to roll out the environment
    "gamma": 0.95,  # Discount factor
    "learning_rate": 0.0002,  # Learning rate for the optimizer
    "ent_coef": 0.01,  # Entropy coefficient for exploration
    "vf_coef": 0.5,  # Value function coefficient
    "max_grad_norm": 0.3,  # Maximum norm for gradient clipping
}
model_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS)

{'n_steps': 10, 'gamma': 0.95, 'learning_rate': 0.0003, 'ent_coef': 0.01, 'vf_coef': 0.5, 'max_grad_norm': 0.3}
Using cpu device


In [56]:
from stable_baselines3.common.callbacks import BaseCallback
import matplotlib.pyplot as plt
import os

class PortfolioMonitorCallback(BaseCallback):
    """
    Custom callback for tracking environment rewards (portfolio rewards)
    and cumulative rewards during training in Jupyter.
    It saves both reward and cumulative reward plots to disk.
    """
    def __init__(self, verbose=0):
        super(PortfolioMonitorCallback, self).__init__(verbose)
        self.portfolio_rewards = []  # To store step-by-step rewards
        self.cumulative_rewards = []  # To store cumulative rewards
        self.cumulative_reward = 0.0  # Running total for cumulative rewards
        os.makedirs("results", exist_ok=True)  # Ensure "results/" folder exists

    def _on_step(self) -> bool:
        env = self.model.env.envs[0]
        
        step_reward = env.reward
        self.portfolio_rewards.append(step_reward)
        
        self.cumulative_reward += step_reward
        self.cumulative_rewards.append(self.cumulative_reward)
        
        return True  # Return False to stop training early if needed

    def _on_training_end(self) -> None:
        # Plot the reward curve
        plt.figure(figsize=(8,4))
        plt.plot(self.portfolio_rewards, label="Portfolio Reward", color='green',linewidth=0.1)
        plt.xlabel("Time Step")
        plt.ylabel("Reward")
        plt.legend()
        plt.savefig("results/TC_portfolio_rewards.png")  # Save reward plot to disk
        plt.close()

        plt.figure(figsize=(8,4))
        plt.plot(self.cumulative_rewards, label="Cumulative Reward", color='green')
        plt.xlabel("Time Step")
        plt.ylabel("Cumulative Reward")
        plt.legend()
        plt.savefig("results/TC_cumulative_rewards.png")  # Save cumulative reward plot to disk
        plt.close()

In [64]:
model_a2c = agent.get_model(model_name="a2c", model_kwargs=A2C_PARAMS)

monitor_callback = PortfolioMonitorCallback()

trained_a2c = agent.train_model(
    model=model_a2c,
    tb_log_name='a2c',
    total_timesteps=100000,
    log_interval=10,
    callback=monitor_callback
)

{'n_steps': 10, 'gamma': 0.95, 'learning_rate': 0.0003, 'ent_coef': 0.01, 'vf_coef': 0.5, 'max_grad_norm': 0.3}
Using cpu device
Logging to tensorboard_log/a2c\a2c_417
------------------------------------
| time/                 |          |
|    fps                | 254      |
|    iterations         | 10       |
|    time_elapsed       | 0        |
|    total_timesteps    | 100      |
| train/                |          |
|    entropy_loss       | -14.2    |
|    explained_variance | -50.1    |
|    learning_rate      | 0.0003   |
|    n_updates          | 9        |
|    policy_loss        | 0.0174   |
|    std                | 1        |
|    value_loss         | 0.0448   |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 254      |
|    iterations         | 20       |
|    time_elapsed       | 0        |
|    total_timesteps    | 200      |
| train/                |          |
|    entropy_loss 

In [74]:
%load_ext tensorboard
#%tensorboard --logdir tensorboard_log/a2c --host localhost --port 6008
#%tensorboard --logdir tensorboard_log/a2c --host localhost --port 6008
%tensorboard --logdir tensorboard_log

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 23920), started 4 days, 5:50:19 ago. (Use '!kill 23920' to kill it.)

### 6.4 Fitting Model on Training Data

### 8.5 Backtesting

In [66]:
A2C
e_trade_gym = StockPortfolioEnv(df = test_data, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

TC_a2c_test_daily_return, TC_a2c_test_weights, TC_a2c_test_transaction_costs = DRLAgent.DRL_prediction(model=trained_a2c,
                        test_data = test_data,
                        test_env = env_trade,
                        test_obs = obs_trade)



In [67]:
TC_a2c_test_daily_return.to_csv('results_datasets/TC_a2c_test_daily_return.csv', index=False)
TC_a2c_test_weights.to_csv('results_datasets/TC_a2c_test_weights.csv')
TC_a2c_test_transaction_costs.to_csv('results_datasets/TC_a2c_test_transaction_costs.csv', index=False)