# Notebook 06 – Evaluation & Results
## Performance Evaluation of PPO Multimodal Trading Agent (EGX)

This notebook evaluates the trained PPO agent on **out-of-sample test data** and reports
standard financial performance metrics used in Chapter 6 of the thesis.


In [5]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
# Install required libraries
!pip install finrl stable-baselines3 gymnasium pandas numpy matplotlib



In [7]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Install missing dependencies
!pip install alpaca-trade-api
!pip install exchange-calendars

from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from stable_baselines3 import PPO



In [8]:
# Load dataset
BASE_DIR = '/content/drive/MyDrive/finrl-egx-multimodal'
DATA_PATH = os.path.join(BASE_DIR, 'data', 'multimodal_finrl_data.csv')
MODEL_PATH = os.path.join(BASE_DIR, 'results', 'ppo_multimodal_model.zip')

df = pd.read_csv(DATA_PATH)
df['Date'] = pd.to_datetime(df['Date'])


  return datetime.utcnow().replace(tzinfo=utc)


In [9]:
# Define technical indicators
tech_indicators = [
    'rsi',
    'sma_20',
    'sma_50',
    'volatility',
    'sentiment_news',
    'sentiment_social'
]

In [22]:
# Test dataset
test_df = df[df['Date'] >= '2024-01-01'].copy()

# Rename columns to FinRL's expected format (lowercase)
test_df.columns = [
    'date' if col == 'Date' else
    'adjcp' if col == 'Adj Close' else
    'close' if col == 'Close' else
    'high' if col == 'High' else
    'low' if col == 'Low' else
    'open' if col == 'Open' else
    'volume' if col == 'Volume' else
    col
    for col in test_df.columns
]

# Ensure data types are numeric for relevant columns
numeric_cols = ['adjcp', 'close', 'high', 'low', 'open', 'volume'] + tech_indicators
for col in numeric_cols:
    if col in test_df.columns:
        test_df[col] = pd.to_numeric(test_df[col], errors='coerce')
# Drop rows where critical numeric columns became NaN (if any non-numeric values were present)
test_df.dropna(subset=['close'], inplace=True);

# Ensure data is sorted by Date and tic, then create a numerical index for unique trading days
test_df = test_df.sort_values(['date', 'tic']).reset_index(drop=True)
test_df.index = test_df.date.factorize()[0]

stock_dimension = len(test_df['tic'].unique())

env_test = StockTradingEnv(
    df=test_df,
    stock_dim=stock_dimension,
    hmax=100,
    initial_amount=1_000_000,
    buy_cost_pct=[0.001] * stock_dimension,
    sell_cost_pct=[0.001] * stock_dimension,
    reward_scaling=1e-4,
    tech_indicator_list=tech_indicators,
    num_stock_shares=[0] * stock_dimension, # Changed to list of zeros
    state_space=1 + 2 * stock_dimension + len(tech_indicators) * stock_dimension, # cash + shares holding + price + technical indicators
    action_space=stock_dimension
)

  return datetime.utcnow().replace(tzinfo=utc)


In [24]:
# Load trained PPO model
model = PPO.load(MODEL_PATH, env=env_test)

state, _ = env_test.reset() # Unpack the tuple to get only the observation
portfolio_values = []

for _ in range(len(test_df['date'].unique()) - 1):
    action, _ = model.predict(state)
    observation, reward, terminated, truncated, info = env_test.step(action)
    state = observation # Update the state for the next iteration
    done = terminated or truncated # Combine terminated and truncated for loop control
    portfolio_values.append(env_test.asset_memory[-1])
    if done:
        break

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [25]:
# Convert portfolio values to Series
portfolio_values = pd.Series(portfolio_values)

# Compute returns
returns = portfolio_values.pct_change().dropna()

cumulative_return = (portfolio_values.iloc[-1] / portfolio_values.iloc[0] - 1) * 100
annualized_return = returns.mean() * 252 * 100
sharpe_ratio = (returns.mean() / returns.std()) * np.sqrt(252)

# Maximum drawdown
rolling_max = portfolio_values.cummax()
drawdown = portfolio_values / rolling_max - 1
max_drawdown = drawdown.min() * 100

cumulative_return, annualized_return, sharpe_ratio, max_drawdown

(np.float64(-99.07753448661275),
 np.float64(-37.91821308245165),
 np.float64(-0.5133010125317101),
 -99.35386792143787)

In [30]:
# Plot equity curve
plt.figure()
plt.plot(portfolio_values)
plt.title('Equity Curve – PPO Multimodal Agent')
plt.xlabel('Trading Days')
plt.ylabel('Portfolio Value')
plt.show()

In [28]:
# Save evaluation metrics
metrics = pd.DataFrame({
    'Metric': ['Cumulative Return (%)', 'Annualized Return (%)', 'Sharpe Ratio', 'Max Drawdown (%)'],
    'Value': [cumulative_return, annualized_return, sharpe_ratio, max_drawdown]
})

metrics_path = os.path.join(BASE_DIR, 'results', 'evaluation_metrics.csv')
metrics.to_csv(metrics_path, index=False)

metrics

Unnamed: 0,Metric,Value
0,Cumulative Return (%),-99.077534
1,Annualized Return (%),-37.918213
2,Sharpe Ratio,-0.513301
3,Max Drawdown (%),-99.353868


  return datetime.utcnow().replace(tzinfo=utc)
