In [6]:
import numpy as np
import pandas as pd
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
import yfinance as yf

In [11]:
def get_asset_data(tickers, start, end):
    data = yf.download(tickers, start=start, end=end)['Adj Close']
    return data

tickers = ['AAPL', 'TXNM', 'MSFT', 'JNJ', 'CL', 'NVDA']
start_date = '2020-01-01'
end_date = '2023-01-01'

In [12]:
class PortfolioEnv(gym.Env):
    def __init__(self, data, tickers):
        super(PortfolioEnv, self).__init__()
        self.data = data
        self.tickers = tickers
        self.current_step = 0
        self.action_space = gym.spaces.Box(low=0, high=1, shape=(len(tickers),), dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(len(tickers),), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        return self.data.iloc[self.current_step].values

    def step(self, action):
        self.current_step += 1
        if self.current_step >= len(self.data):
            done = True
            self.current_step = 0
        else:
            done = False

        obs = self.data.iloc[self.current_step].values
        reward = self.calculate_reward(action)
        return obs, reward, done, {}

    def calculate_reward(self, action):
        # Define your reward function based on portfolio returns and risk
        return np.dot(action, self.data.iloc[self.current_step].values)  # Simplified reward calculation


In [14]:
data = get_asset_data(tickers, start_date, end_date).pct_change().dropna()
data = data.reset_index(drop=True)

env = DummyVecEnv([lambda: PortfolioEnv(data, tickers)])

model = PPO('MlpPolicy', env, verbose=1)
model.learn(total_timesteps=10000)

obs = env.reset()
action, _ = model.predict(obs)
print("Recommended Portfolio Weights:", action)

weights = action.flatten()
portfolio_df = pd.DataFrame({'Ticker': tickers, 'Weight': weights})

print("Recommended Portfolio Weights:")
print(portfolio_df)

risk_free_rate = 0.02 # assumed value

expected_returns = data.mean() * 252
cov_matrix = data.cov() * 252 

portfolio_return = np.dot(weights, expected_returns)
portfolio_risk = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))

sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_risk

print(f"\nExpected Portfolio Return: {portfolio_return:.2%}")
print(f"Portfolio Risk (Volatility): {portfolio_risk:.2%}")
print(f"Sharpe Ratio: {sharpe_ratio:.2f}")

[*********************100%***********************]  6 of 6 completed


Using cpu device
-----------------------------
| time/              |      |
|    fps             | 5976 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 3891        |
|    iterations           | 2           |
|    time_elapsed         | 1           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.004879192 |
|    clip_fraction        | 0.0161      |
|    clip_range           | 0.2         |
|    entropy_loss         | -8.55       |
|    explained_variance   | 0.00209     |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0341     |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00458    |
|    std                  | 1.01        |
|    value_loss           | 0.00752     |
-----------------