Cell 1: Install dependencies

In [None]:
!pip install stable-baselines3 pandas gym matplotlib


Cell 2: Load data and define the environment

In [None]:
import gym
from gym import spaces
import pandas as pd
import numpy as np

# Load historical sentiment + price data
df = pd.read_csv('/content/merged_sentiment_price.csv')
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date')
df['price_return'] = df['close'].pct_change()
df['sentiment_smooth'] = df['sentiment'].rolling(3).mean()
df = df.dropna().reset_index(drop=True)

state_features = ['close', 'price_return', 'sentiment', 'sentiment_smooth']

class TradingEnv(gym.Env):
    def __init__(self, df):
        super().__init__()
        self.df = df
        self.current_step = 0
        self.max_steps = len(df) - 1
        self.action_space = spaces.Discrete(3)  # 0=Hold, 1=Buy, 2=Sell
        self.observation_space = spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=(len(state_features),),
            dtype=np.float32
        )
        self.position = 0  # 0=flat, 1=long, -1=short
        self.entry_price = 0

    def reset(self):
        self.current_step = 0
        self.position = 0
        self.entry_price = 0
        return self._get_obs()

    def _get_obs(self):
        return self.df.loc[self.current_step, state_features].values.astype(np.float32)

    def step(self, action):
        done = False
        reward = 0
        price = self.df.loc[self.current_step, 'close']

        if action == 1:  # Buy
            if self.position == 0:
                self.position = 1
                self.entry_price = price
        elif action == 2:  # Sell
            if self.position == 0:
                self.position = -1
                self.entry_price = price
        elif action == 0:  # Hold
            pass

        # End of data or final step
        self.current_step += 1
        if self.current_step >= self.max_steps:
            done = True
            if self.position != 0:
                price_change = (price - self.entry_price) / self.entry_price
                reward = price_change if self.position == 1 else -price_change

        return self._get_obs(), reward, done, {}

    def render(self, mode='human'):
        pass  # Optional


Cell 3: Register and train PPO agent

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

env = TradingEnv(df)
check_env(env)  # Check for Gym compliance

model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)


Cell 4: Evaluate the policy

In [None]:
obs = env.reset()
rewards = []
for _ in range(len(df) - 1):
    action, _states = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    rewards.append(reward)
    if done:
        break

import matplotlib.pyplot as plt
plt.plot(np.cumsum(rewards))
plt.title("Cumulative Reward Over Time")
plt.xlabel("Step")
plt.ylabel("Cumulative Reward")
plt.show()


Cell 5: Save the agent

In [None]:
model.save("ppo_trading_agent")
print("âœ… PPO model saved.")


Cell 6: Download

In [None]:
from google.colab import files
files.download("ppo_trading_agent.zip")
