In [3]:
import gym
from gym import spaces
import numpy as np
import logging

# Setting up logging
logging.basicConfig(level=logging.INFO, format='%(message)s')
logger = logging.getLogger()

# Define the Portfolio Environment
class PortfolioEnv(gym.Env):
    def __init__(self, data: np.ndarray, initial_cash: float = 10000.0):
        """
        Initialize the portfolio environment.
        :param data: Historical asset price data (numpy array).
        :param initial_cash: Starting capital.
        """
        super(PortfolioEnv, self).__init__()
        
        self.data = data
        self.initial_cash = initial_cash
        self.num_assets = data.shape[1]
        
        # Define state space and action space
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(self.num_assets + 2,))
        self.action_space = spaces.Box(low=0, high=1, shape=(self.num_assets,))
        
        self.reset()

    def reset(self):
        """
        Reset the environment to the initial state.
        """
        self.current_step = 0
        self.cash = self.initial_cash
        self.portfolio = np.zeros(self.num_assets)
        logger.info(f"Environment reset. Initial cash: {self.cash}, Portfolio: {self.portfolio}")
        return self._get_observation()

    def step(self, action: np.ndarray):
        """
        Take an action in the environment.
        :param action: Allocation proportions for each asset.
        """
        logger.info(f"Step {self.current_step}: Taking action: {action}")
        
        # Normalize action to ensure it sums to 1
        action = action / np.sum(action)
        
        # Calculate portfolio rebalancing
        current_prices = self.data[self.current_step]
        portfolio_value = np.dot(self.portfolio, current_prices) + self.cash
        new_portfolio = portfolio_value * action / current_prices
        transaction_costs = np.sum(np.abs(new_portfolio - self.portfolio)) * 0.001  # 0.1% fee
        reward = portfolio_value - transaction_costs - self.cash
        
        # Update state
        self.cash = portfolio_value - np.sum(new_portfolio * current_prices)
        self.portfolio = new_portfolio
        logger.info(f"New portfolio: {self.portfolio}, Remaining cash: {self.cash}, Reward: {reward}")

        self.current_step += 1
        done = self.current_step >= len(self.data) - 1
        return self._get_observation(), reward, done, {}

    def _get_observation(self):
        """
        Generate the current observation.
        """
        current_prices = self.data[self.current_step]
        portfolio_value = np.dot(self.portfolio, current_prices)
        return np.concatenate([current_prices, [portfolio_value, self.cash]])


In [4]:
# Simulating Sample Data
np.random.seed(42)
sample_data = np.random.uniform(low=50, high=150, size=(10, 5))  # 5 assets, 10 timesteps

# Running the environment
env = PortfolioEnv(data=sample_data, initial_cash=10000.0)
state = env.reset()

for _ in range(len(sample_data)):
    # Sample a random action (proportions for each asset)
    action = env.action_space.sample()
    state, reward, done, _ = env.step(action)
    if done:
        logger.info("End of the episode reached.")
        break

Environment reset. Initial cash: 10000.0, Portfolio: [0. 0. 0. 0. 0.]
Environment reset. Initial cash: 10000.0, Portfolio: [0. 0. 0. 0. 0.]
Step 0: Taking action: [0.688157   0.26861453 0.6727024  0.09545094 0.02135996]
New portfolio: [45.06016206 10.60309212 31.26794487  4.9751072   1.86452892], Remaining cash: -0.0005029141902923584, Reward: -0.0937708351702895
Step 1: Taking action: [0.12593919 0.9422795  0.10287945 0.21460946 0.31195104]
New portfolio: [ 9.71692433 85.45719202  3.8114488   9.86471044 13.06957754], Remaining cash: 0.0001280379274248844, Reward: 8592.326575664405
Step 2: Taking action: [0.5267901 0.7105647 0.370498  0.9189529 0.8909605]
New portfolio: [44.91172803 21.4548904  12.34101065 57.25584655 57.99612226], Remaining cash: -0.0005650876846630126, Reward: 15168.756841828805
Step 3: Taking action: [0.33872604 0.27471083 0.5177382  0.3314631  0.7941609 ]
New portfolio: [35.10495131 24.19282305 35.78386179 25.1908329  71.08924047], Remaining cash: -0.00107182315332