# Notebook 04 – FinRL Trading Environment
## Multimodal Reinforcement Learning Environment for EGX

This notebook builds a **FinRL-compatible trading environment** that integrates:
- Price-based technical indicators
- Financial news sentiment
- Social media sentiment

Stocks:
- COMI
- AMOC
- SWDY


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# Install required libraries
!pip install finrl stable-baselines3 gymnasium pandas numpy

Collecting finrl
  Downloading FinRL-0.3.7-py3-none-any.whl.metadata (909 bytes)
Collecting stable-baselines3
  Downloading stable_baselines3-2.7.1-py3-none-any.whl.metadata (4.8 kB)
Downloading FinRL-0.3.7-py3-none-any.whl (127 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.2/127.2 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading stable_baselines3-2.7.1-py3-none-any.whl (188 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m188.0/188.0 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: finrl, stable-baselines3
Successfully installed finrl-0.3.7 stable-baselines3-2.7.1


In [5]:
import os
import pandas as pd
import numpy as np


In [6]:
# Base directories
BASE_DIR = '/content/drive/MyDrive/finrl-egx-multimodal'

PRICE_DIR = os.path.join(BASE_DIR, 'data/stocks_processed')
NEWS_DIR = os.path.join(BASE_DIR, 'data/news')
SENTIMENT_DIR = os.path.join(BASE_DIR, 'data/sentiment')

In [7]:
# Load and merge multimodal data
def load_multimodal_data(stock):
    price_df = pd.read_csv(os.path.join(PRICE_DIR, f'{stock}_processed.csv'))
    news_df = pd.read_csv(os.path.join(NEWS_DIR, f'{stock}_news.csv'))
    sentiment_df = pd.read_csv(os.path.join(SENTIMENT_DIR, f'{stock}_sentiment.csv'))

    price_df['Date'] = pd.to_datetime(price_df['Date'])
    news_df['date'] = pd.to_datetime(news_df['date'])
    sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])

    # Aggregate daily news sentiment
    news_agg = news_df.groupby('date')['sentiment'].mean().reset_index()

    # Merge all data
    df = price_df.merge(news_agg, left_on='Date', right_on='date', how='left')
    df = df.merge(sentiment_df, left_on='Date', right_on='date', how='left', suffixes=('_news', '_social'))

    df['sentiment_news'].fillna(0, inplace=True)
    df['sentiment_social'].fillna(0, inplace=True)

    df.drop(columns=['date_x', 'date_y'], inplace=True)
    return df

In [8]:
import os
import pandas as pd
import numpy as np

# Assuming BASE_DIR, PRICE_DIR, NEWS_DIR, SENTIMENT_DIR are already defined from previous cells
# (If not, they would need to be re-defined here as well for this cell to be fully self-contained)

# Load and merge multimodal data (corrected function definition)
def load_multimodal_data(stock):
    price_df = pd.read_csv(os.path.join(PRICE_DIR, f'{stock}_processed.csv'))
    news_df = pd.read_csv(os.path.join(NEWS_DIR, f'{stock}_news.csv'))
    sentiment_df = pd.read_csv(os.path.join(SENTIMENT_DIR, f'{stock}_sentiment.csv'))

    price_df['Date'] = pd.to_datetime(price_df['Date'])
    news_df['date'] = pd.to_datetime(news_df['date'])
    sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])

    # Aggregate daily news sentiment
    news_agg = news_df.groupby('date')['sentiment'].mean().reset_index()

    # Merge all data
    # The first merge will result in 'Date' (from price_df) and 'date' (from news_agg)
    df = price_df.merge(news_agg, left_on='Date', right_on='date', how='left')

    # The second merge:
    # Left df has 'Date' and 'date'
    # Right sentiment_df has 'date'
    # Merge keys: left_on='Date', right_on='date'
    # The 'date' column from the left df will become 'date_news'
    # The 'date' column from the right sentiment_df will become 'date_social'
    df = df.merge(sentiment_df, left_on='Date', right_on='date', how='left', suffixes=('_news', '_social'))

    # Fix FutureWarning: better to reassign than use inplace=True on potentially sliced DataFrame
    df['sentiment_news'] = df['sentiment_news'].fillna(0)
    df['sentiment_social'] = df['sentiment_social'].fillna(0)

    # Corrected column names to drop
    df.drop(columns=['date_news', 'date_social'], inplace=True)
    return df

# Build final dataset for FinRL (original code from this cell)
stocks = ['COMI', 'AMOC', 'SWDY']
all_data = []

for stock in stocks:
    df = load_multimodal_data(stock)
    df['tic'] = stock
    all_data.append(df)

final_df = pd.concat(all_data)
final_df.sort_values(['Date', 'tic'], inplace=True)

final_df.head()


Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume,daily_return,rsi,sma_20,sma_50,volatility,sentiment_news,sentiment_social,tic
0,2020-03-15,1.230391,1.899999976158142,2.059999942779541,1.850000023841858,2.180000066757202,2335434,-0.12844,13.470857,1.802846,2.059285,0.042764,0.0,-0.314831,AMOC
0,2020-03-15,28.729263,31.47247314453125,34.14461898803711,31.447547912597656,34.937286376953125,17661326,-0.100841,15.340648,36.029904,37.541819,0.031654,0.0,-0.145865,COMI
0,2020-03-15,6.516446,7.809999942779541,8.380000114440918,7.769999980926514,8.630000114440918,2740164,-0.072446,23.45679,7.719194,8.512932,0.033478,0.0,-0.09956,SWDY
1,2020-03-16,1.133255,1.75,1.899999976158142,1.7100000381469729,1.899999976158142,2396915,-0.078947,12.021511,1.754926,2.034807,0.044315,0.0,-0.696901,AMOC
1,2020-03-16,26.80427,29.363672256469727,31.25810432434082,28.34167861938477,31.47247314453125,5148437,-0.067005,12.860626,35.425101,37.32156,0.033519,0.078292,0.078292,COMI


In [9]:
# Save merged dataset
output_path = os.path.join(BASE_DIR, 'data', 'multimodal_finrl_data.csv')
final_df.to_csv(output_path, index=False)

print(f'Multimodal dataset saved to: {output_path}')

Multimodal dataset saved to: /content/drive/MyDrive/finrl-egx-multimodal/data/multimodal_finrl_data.csv
