## Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mplfinance as mpf

import quantstats as qs

from src.env.env import CustomStockEnv
import warnings

warnings.simplefilter(action='ignore')

## Constants

In [2]:
INITIAL_INVESTMENT = 30_000
NUMBER_OF_EPISODES = 10_000

## Extract historical data

In [None]:
env = CustomStockEnv.build_from_symbol('2019-01-01', '2024-10-31')
data = env.data

data['Close'].plot(figsize=(14, 7), title='The Coca-Cola Company Asset Close Historical Prices', ylabel='Asset Value ($)', xlabel='Date', color=plt.cm.cividis(0.3))
plt.show()

## Calculate returns, expected mean returns and risk

In [None]:
## get the returns
returns = data.pct_change()

## calculate the expected mean returns
mean_return = returns.mean()

## calculate the asset trading risk
volatility = returns.std()

print(f"Mean Return: {round(mean_return['Close'], 5)}")
print(f"Volatility (Risk): {round(volatility['Close'], 5)}")

## Define statistical strategy

### MACD Strategy

In [5]:
from src.utils.macd_strategy import MACDStrategy

strategy = MACDStrategy(CustomStockEnv.build_from_symbol(start_date="2019-01-01", end_date="2024-10-31"))
strategy.apply_strategy(INITIAL_INVESTMENT)
data = strategy.data

## Strategy results

In [None]:
color1 = plt.cm.cividis(0.3)
color2 = plt.cm.cividis(0.8)

fig, ax1 = plt.subplots(figsize=(14, 7))

# Plot the close prices
ax1.plot(data.index, data['Close'], label='Close Price', color=color1)
ax1.set_xlabel('Date')
ax1.set_ylabel('Close Price ($)', color=color1)
ax1.tick_params(axis='y', labelcolor='black')

# Create a second y-axis to plot the histogram
ax2 = ax1.twinx()
ax2.plot(data.index, data['Histogram'], label='MACD Histogram', color=color2)
ax2.tick_params(axis='y', labelcolor='black')

fig.suptitle('Close Prices and MACD Histogram')
ax1.legend()
ax2.legend()
fig.tight_layout()
plt.show()

In [None]:
scaled_close = data['Close'] * (data['Portfolio'].max() / data['Close'].max())

# Plot the portfolio results with cividis color map
plt.figure(figsize=(14, 7))
plt.plot(data.index, scaled_close, label='Scaled Close Price', color=plt.cm.cividis(0.8))
plt.plot(data.index, data['Portfolio'], label='Portfolio', color=plt.cm.cividis(0.3))
plt.legend()
plt.title('MACD Strategy')
plt.xlabel('Date')
plt.ylabel('Portfolio balance ($)')
plt.show()

In [None]:
from src.utils.backtest import Backtest

Backtest.evaluate(strategy, initial_investment=INITIAL_INVESTMENT)

## Agent

### Q-Learning agent

In [None]:
from src.agent.q_learning_agent import QLearningAgent

agent = QLearningAgent(
        env=env,
        entry_points=strategy.entry_points,
        symbol_risk_free_rate=strategy.risk)

total_reward, portfolio, trades = agent.learn(initial_investment=INITIAL_INVESTMENT, num_episodes=NUMBER_OF_EPISODES, verbose=True)

## Agent Results

In [None]:
Backtest.model_evaluation(portfolio)

### Model fine-tuning

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

from src.env.q_learning_tuning_env import QLearningTuningEnv

# Create the meta-environment
meta_env = DummyVecEnv([lambda: QLearningTuningEnv(strategy=strategy, max_episodes=5)])

# Create and train the PPO agent
ppo_agent = PPO(
    "MlpPolicy",
    meta_env,
    n_steps=256,      # Reduced rollout length to prevent memory issues
    verbose=1
)
ppo_agent.learn(total_timesteps=50)

# Get the best hyperparameters from the trained PPO agent
obs = meta_env.envs[0].observation_space.sample()
action, _ = ppo_agent.predict(obs, deterministic=True)

# Extract hyperparameters using the action index
best_hyperparameters = meta_env.envs[0].action_combinations[action]
alpha, gamma, epsilon_min, epsilon_decay, lambda_min, lambda_decay = best_hyperparameters

print("Best Hyperparameters Found:")
print(f"Learning Rate: {round(alpha, 2)}, Gamma: {round(gamma, 2)}, Minimum Epsilon: {round(epsilon_min, 2)}, Epsilon Decay: {round(epsilon_decay, 3)}, Minimun Lambda: {round(lambda_min, 2)}, Lambda Decay: {round(lambda_decay, 3)}")

### Fine-tuned Agent results

In [None]:
fine_tuned_agent = QLearningAgent(
    env=env,
    entry_points=strategy.entry_points,
    alpha=alpha,
    gamma=gamma,
    epsilon_min=epsilon_min,
    epsilon_decay=epsilon_decay,
    lambda_min=lambda_min,
    lambda_decay=lambda_decay,
    symbol_risk_free_rate=strategy.risk)
ft_total_reward, ft_portfolio, ft_trades = fine_tuned_agent.learn(initial_investment=INITIAL_INVESTMENT, num_episodes=NUMBER_OF_EPISODES, verbose=True)

## Results

In [None]:
# Plot the portfolio results with cividis color map
plt.figure(figsize=(14, 7))
plt.plot(data.index, data['Portfolio'], label='Strategy', color=plt.cm.cividis(0.8))
plt.plot(data.index, portfolio[1:], label='Default agent', color=plt.cm.cividis(0.3))
plt.plot(data.index, ft_portfolio[1:], label='Fine-tuned agent', color=plt.cm.cividis(0.6))
plt.legend()
plt.title('Agent Comparison vs. Strategy')
plt.xlabel('Date')
plt.ylabel('Portfolio balance ($)')
plt.show()