In [1]:
!pip install stable-baselines3 gym yfinance transformers textblob newsapi-python praw asyncpraw
!pip install torch torchvision torchaudio  # Ensure PyTorch is installed
!pip install scikit-learn

Collecting yfinance
  Downloading yfinance-0.2.51-py2.py3-none-any.whl.metadata (5.5 kB)
Collecting transformers
  Downloading transformers-4.47.1-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m361.3 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting textblob
  Downloading textblob-0.18.0.post0-py3-none-any.whl.metadata (4.5 kB)
Collecting newsapi-python
  Downloading newsapi_python-0.2.7-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting praw
  Downloading praw-7.8.1-py3-none-any.whl.metadata (9.4 kB)
Collecting asyncpraw
  Downloading asyncpraw-7.8.1-py3-none-any.whl.metadata (9.0 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.11-py3-none-any.whl.metadata (5.5 kB)
Collecting peewee>=3.16.2 (from yfinance)
  Downloading peewee-3.17.8.tar.gz (948 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m948.2/948.2 kB[0m [31m895.9 kB/s[0m eta [36m0:00:00[0

In [2]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertTokenizer, BertModel
from textblob import TextBlob
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.vec_env import DummyVecEnv
from gym import Env
from gym.spaces import Discrete, Box
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score
import time
from datetime import datetime

In [18]:
# Path to your merged CSV file
file_path = 'merged_stock_news_data.csv'

# Load the merged data
merged_data = pd.read_csv(file_path)

# Display the first few rows
print("Merged Data:")
print(merged_data.head())

Merged Data:
         Date Symbol  Adj Close      Close       High        Low       Open  \
0  04/01/2010    MMM  44.016720  69.414719  69.774246  69.122070  69.473244   
1  05/01/2010    MMM  43.741032  68.979935  69.590302  68.311035  69.230766   
2  06/01/2010    MMM  44.361347  69.958191  70.735786  69.824417  70.133781   
3  07/01/2010    MMM  44.393150  70.008362  70.033447  68.662209  69.665550   
4  08/01/2010    MMM  44.705982  70.501671  70.501671  69.648827  69.974915   

      Volume                                            content  sentiment  
0  3640265.0  r/Stocks Daily Discussion & Fundamentals Frida...   0.000000  
1  3405012.0  ASML is this weird green color today, that can...  -0.138095  
2  6301126.0                 Even AMD is a little bit up today!  -0.234375  
3  5346240.0  I‚Äôm thankful for one of the strongest runs i...   0.000000  
4  4073337.0  [From September 2021](https://i.imgur.com/nCzn...   0.209855  


In [19]:
# Convert 'Date' to datetime without specifying the format
merged_data['Date'] = pd.to_datetime(merged_data['Date'], infer_datetime_format=True, dayfirst=True, errors='coerce')

# Check for any parsing failures
num_failed = merged_data['Date'].isnull().sum()
print(f"\nNumber of dates failed to parse: {num_failed}")

if num_failed > 0:
    print("Some dates couldn't be parsed. Please check the data for inconsistencies.")
    # Optionally, inspect the rows with NaT
    failed_dates = merged_data[merged_data['Date'].isnull()]
    print(failed_dates[['Date']])
    # Decide on an imputation strategy. For simplicity, we'll drop these rows.
    merged_data = merged_data.dropna(subset=['Date']).reset_index(drop=True)
    print(f"Dropped rows with unparsable dates. New data size: {merged_data.shape}")


Number of dates failed to parse: 0


  merged_data['Date'] = pd.to_datetime(merged_data['Date'], infer_datetime_format=True, dayfirst=True, errors='coerce')


In [20]:
def compute_rsi(prices, period=14):
    delta = prices.diff()
    gain = (delta.where(delta > 0, 0)).fillna(0)
    loss = (-delta.where(delta < 0, 0)).fillna(0)
    avg_gain = gain.rolling(window=period).mean()
    avg_loss = loss.rolling(window=period).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

merged_data['SMA'] = merged_data['Close'].rolling(window=10).mean()
merged_data['RSI'] = compute_rsi(merged_data['Close'])
merged_data[['SMA', 'RSI']] = merged_data[['SMA', 'RSI']].fillna(method='bfill').fillna(0)

  merged_data[['SMA', 'RSI']] = merged_data[['SMA', 'RSI']].fillna(method='bfill').fillna(0)


In [21]:
missing_sentiment = merged_data['sentiment'].isnull().sum()
print(f"\nMissing Sentiment Values: {missing_sentiment}")
if missing_sentiment > 0:
    print("Filling missing sentiment values with 0 (neutral sentiment).")
    merged_data['sentiment'].fillna(0, inplace=True)


Missing Sentiment Values: 1
Filling missing sentiment values with 0 (neutral sentiment).


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_data['sentiment'].fillna(0, inplace=True)


In [22]:
merged_data = merged_data.sort_values('Date').reset_index(drop=True)
print("\nData after Sorting:")
print(merged_data[['Date', 'Close']].head())


Data after Sorting:
        Date      Close
0 2010-01-04  69.414719
1 2010-01-04  14.300000
2 2010-01-04  69.190002
3 2010-01-04  35.450001
4 2010-01-04   7.643214


In [23]:
class StockTransformer(nn.Module):
    def __init__(self, input_size, hidden_size=128, num_layers=2):
        super(StockTransformer, self).__init__()
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=4)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.input_fc = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        """
        x: Tensor of shape (batch_size, seq_len, input_size)
        """
        x = self.input_fc(x)          # (batch_size, seq_len, hidden_size)
        x = self.relu(x)
        x = x.permute(1, 0, 2)        # (seq_len, batch_size, hidden_size)
        x = self.transformer_encoder(x)  # (seq_len, batch_size, hidden_size)
        x = x.permute(1, 0, 2)        # (batch_size, seq_len, hidden_size)
        return x


In [24]:
class NewsTransformer(nn.Module):
    def __init__(self, pretrained_model='bert-base-uncased', hidden_size=128):
        super(NewsTransformer, self).__init__()
        self.tokenizer = BertTokenizer.from_pretrained(pretrained_model)
        self.bert = BertModel.from_pretrained(pretrained_model)
        self.fc = nn.Linear(self.bert.config.hidden_size, hidden_size)
        self.relu = nn.ReLU()
    
    def forward(self, texts):
        """
        texts: List of strings
        Returns:
            embeddings: Tensor of shape (batch_size, hidden_size)
        """
        # Tokenize input texts
        encoding = self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True, max_length=128)
        input_ids = encoding['input_ids'].to(self.bert.device)
        attention_mask = encoding['attention_mask'].to(self.bert.device)
        
        # Get BERT outputs
        with torch.no_grad():  # Freeze BERT parameters
            outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_embeddings = outputs.last_hidden_state[:, 0, :]  # CLS token
        
        # Pass through a fully connected layer
        embeddings = self.fc(cls_embeddings)
        embeddings = self.relu(embeddings)
        return embeddings


In [25]:
class CombinedTransformer(nn.Module):
    def __init__(self, stock_input_size, stock_hidden_size, news_hidden_size, combined_size=256):
        super(CombinedTransformer, self).__init__()
        self.stock_transformer = StockTransformer(input_size=stock_input_size, hidden_size=stock_hidden_size)
        self.news_transformer = NewsTransformer(hidden_size=news_hidden_size)
        self.fc = nn.Linear(stock_hidden_size + news_hidden_size, combined_size)
        self.relu = nn.ReLU()
    
    def forward(self, stock_data, news_texts):
        """
        stock_data: Tensor of shape (batch_size, seq_len, stock_input_size)
        news_texts: List of strings
        Returns:
            combined_embeddings: Tensor of shape (batch_size, combined_size)
        """
        stock_embeddings = self.stock_transformer(stock_data)          # (batch_size, seq_len, stock_hidden_size)
        stock_embeddings = stock_embeddings.mean(dim=1)                # (batch_size, stock_hidden_size)
        news_embeddings = self.news_transformer(news_texts)            # (batch_size, news_hidden_size)
        combined = torch.cat((stock_embeddings, news_embeddings), dim=1)  # (batch_size, stock_hidden_size + news_hidden_size)
        combined = self.relu(self.fc(combined))                         # (batch_size, combined_size)
        return combined


In [26]:
class DecisionTransformerModel(nn.Module):
    def __init__(self, combined_size, action_size, hidden_size=256):
        super(DecisionTransformerModel, self).__init__()
        self.fc = nn.Linear(combined_size, hidden_size)
        self.transformer = nn.Transformer(d_model=hidden_size, nhead=4, num_encoder_layers=4)
        self.output_layer = nn.Linear(hidden_size, action_size)
    
    def forward(self, combined_embeddings):
        """
        combined_embeddings: Tensor of shape (batch_size, combined_size)
        Returns:
            logits: Tensor of shape (batch_size, action_size)
        """
        hidden = torch.relu(self.fc(combined_embeddings))              # (batch_size, hidden_size)
        hidden = hidden.unsqueeze(0)                                    # (1, batch_size, hidden_size) for transformer
        transformer_output = self.transformer(hidden, hidden)           # (1, batch_size, hidden_size)
        transformer_output = transformer_output.squeeze(0)              # (batch_size, hidden_size)
        logits = self.output_layer(transformer_output)                  # (batch_size, action_size)
        return logits

In [48]:
class StockTradingEnv(Env):
    def __init__(self, merged_data, seq_len=10):
        super(StockTradingEnv, self).__init__()
        self.seq_len = seq_len
        self.current_step = 0
        self.done = False

        # Store merged data
        self.data = merged_data.reset_index(drop=True)
        self.total_steps = len(self.data) - self.seq_len

        # Define action and observation space
        self.action_space = Discrete(3)  # Actions: Buy, Hold, Sell
        self.observation_space = Box(
            low=0, high=1, shape=(self.seq_len, 4), dtype=np.float32
        )  # [Close, SMA, RSI, sentiment]
    
        # Initialize state
        self.state = self._next_observation()

    def _next_observation(self):
        if self.current_step + self.seq_len <= len(self.data):
            obs = self.data.iloc[self.current_step:self.current_step + self.seq_len][
                ['Close', 'SMA', 'RSI', 'sentiment']
            ].to_numpy()
            return obs
        else:
            return np.zeros((self.seq_len, 4))  # Zero array if out of bounds

    def step(self, action):
        self.current_step += 1
        if self.current_step >= self.total_steps:
            self.done = True
        else:
            self.done = False

        reward = 0
        if action == 0:  # Buy
            reward = self.data.iloc[self.current_step]['Close'] - self.data.iloc[self.current_step - 1]['Close']
        elif action == 2:  # Sell
            reward = self.data.iloc[self.current_step - 1]['Close'] - self.data.iloc[self.current_step]['Close']

        self.state = self._next_observation()
        return self.state, reward, self.done, {}

    def reset(self):
        self.current_step = 0
        self.done = False
        return self._next_observation()


In [49]:
# Initialize the environment with merged data
env = StockTradingEnv(merged_data=merged_data, seq_len=10)
vec_env = DummyVecEnv([lambda: env])



In [29]:
# 8. Training and Evaluating the Model
# 8.1. Initializing Transformers and Decision Transformer
# Define feature sizes
stock_input_size = 4          # [Close, SMA, RSI, sentiment]
stock_hidden_size = 128
news_hidden_size = 128
combined_size = 256
action_size = env.action_space.n  # 3 actions

# Initialize Combined Transformer
combined_transformer = CombinedTransformer(
    stock_input_size=stock_input_size,
    stock_hidden_size=stock_hidden_size,
    news_hidden_size=news_hidden_size,
    combined_size=combined_size
)

# Initialize Decision Transformer
decision_transformer = DecisionTransformerModel(
    combined_size=combined_size,
    action_size=action_size,
    hidden_size=256
)




tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [50]:
# Move models to device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
combined_transformer.to(device)
decision_transformer.to(device)

# 8.2. Setting Up Optimizer and Loss Function
# Define optimizer (combining parameters from both transformers)
optimizer = optim.Adam(
    list(combined_transformer.parameters()) + list(decision_transformer.parameters()),
    lr=1e-4
)

# Define loss function
criterion = nn.CrossEntropyLoss()

# 8.3. Training Loop
num_episodes = 5
seq_len = 10
batch_size = 32 

In [33]:
!pip install tqdm



In [None]:
# Import necessary modules
from tqdm.notebook import tqdm
import time
import logging
from torch.utils.tensorboard import SummaryWriter

# Setup logging
logging.basicConfig(filename='training_logs.txt', level=logging.INFO, format='%(asctime)s - %(message)s')

# Initialize TensorBoard writer
writer = SummaryWriter('runs/training_experiment')

# Define hyperparameters
num_episodes = 100
seq_len = 10

# Initialize lists to store rewards and losses for analysis
episode_rewards = []
episode_losses = []

# Record the start time of the training
training_start_time = time.time()

# Create a tqdm progress bar for episodes
for episode in tqdm(range(num_episodes), desc="Training Episodes"):
    # Record the start time of the episode
    episode_start_time = time.time()
    
    state = env.reset()  # state shape: (seq_len, 4)
    total_reward = 0
    episode_loss = 0  # To accumulate loss over steps in the episode
    
    # Iterate over each step within the episode
    for step in range(env.total_steps):
        # Convert state to tensor and move to device
        state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(device)  # (1, seq_len, 4)
        
        # Prepare news data
        news_text = merged_data.iloc[env.current_step]['content']
        news_texts = [news_text]  # Batch size 1
        
        # Get combined embeddings
        combined_embeddings = combined_transformer(state_tensor, news_texts)  # (1, combined_size)
        
        # Get action logits from Decision Transformer
        logits = decision_transformer(combined_embeddings)  # (1, action_size)
        
        # Choose action with highest probability
        action = torch.argmax(logits, dim=1).item()
        
        # Take action in the environment
        next_state, reward, done, _ = env.step(action)
        total_reward += reward
        
        # Convert action to tensor
        action_tensor = torch.tensor([action]).to(device)
        
        # Compute loss
        loss = criterion(logits, action_tensor)
        episode_loss += loss.item()
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if done:
            break
        
        # Update state
        state = next_state
    
    # Record the episode's total reward and average loss
    episode_rewards.append(total_reward)
    average_loss = episode_loss / (step + 1)
    episode_losses.append(average_loss)
    
    # Calculate the time taken for the episode
    episode_end_time = time.time()
    episode_duration = episode_end_time - episode_start_time
    
    # Log metrics
    logging.info(f"Episode {episode+1}/{num_episodes}, Total Reward: {total_reward:.2f}, Avg Loss: {average_loss:.4f}, Time: {episode_duration:.2f}s")
    writer.add_scalar('Total Reward', total_reward, episode)
    writer.add_scalar('Average Loss', average_loss, episode)
    
    # Update the tqdm description with current episode info
    tqdm.write(f"Episode {episode+1}/{num_episodes}, Total Reward: {total_reward:.2f}, Avg Loss: {average_loss:.4f}, Time: {episode_duration:.2f}s")
    
    # Save model checkpoints every 10 episodes
    if (episode + 1) % 10 == 0:
        torch.save(combined_transformer.state_dict(), f'combined_transformer_epoch_{episode+1}.pth')
        torch.save(decision_transformer.state_dict(), f'decision_transformer_epoch_{episode+1}.pth')
        tqdm.write(f"Saved model checkpoints at Episode {episode+1}")

# Record the end time of the training
training_end_time = time.time()
total_training_time = training_end_time - training_start_time

print(f"\nTraining completed in {total_training_time/60:.2f} minutes.")

# Close the TensorBoard writer
writer.close()

In [None]:
# 9. Evaluating the Model
state = env.reset()
predicted_actions = []
true_labels = []
total_reward = 0

for step in range(env.total_steps):
    # Convert state to tensor and move to device
    state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(device)  # (1, seq_len, 4)
    
    # Prepare news data
    news_text = merged_data.iloc[env.current_step]['content']
    news_texts = [news_text]  # Batch size 1
    
    # Get combined embeddings
    combined_embeddings = combined_transformer(state_tensor, news_texts)  # (1, combined_size)
    
    # Get action logits from Decision Transformer
    logits = decision_transformer(combined_embeddings)  # (1, action_size)
    
    # Choose action with highest probability
    action = torch.argmax(logits, dim=1).item()
    predicted_actions.append(action)
    
    # Determine true action based on price movement
    current_close = merged_data.iloc[env.current_step]['Close']
    previous_close = merged_data.iloc[env.current_step - 1]['Close']
    if current_close > previous_close:
        true_labels.append(0)  # Buy
    else:
        true_labels.append(2)  # Sell
    
    # Take action in the environment
    next_state, reward, done, _ = env.step(action)
    total_reward += reward
    
    if done:
        break
    
    # Update state
    state = next_state

# Calculate evaluation metrics
accuracy = accuracy_score(true_labels, predicted_actions)
f1 = f1_score(true_labels, predicted_actions, average='weighted')
cm = confusion_matrix(true_labels, predicted_actions)

print(f"\nEvaluation Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Confusion Matrix:\n{cm}")

# 10. Visualization of Results
# Plotting Buy and Sell signals
plt.figure(figsize=(12, 6))
plt.plot(merged_data['Date'], merged_data['Close'], label="Close Price", alpha=0.6)

# Identify Buy and Sell signals
buy_signals = [i for i, action in enumerate(predicted_actions) if action == 0]
sell_signals = [i for i, action in enumerate(predicted_actions) if action == 2]

# Adjust indices for plotting (add seq_len offset)
buy_signals = [i + seq_len for i in buy_signals]
sell_signals = [i + seq_len for i in sell_signals]

# Ensure indices are within bounds
buy_signals = [i for i in buy_signals if i < len(merged_data)]
sell_signals = [i for i in sell_signals if i < len(merged_data)]

# Plot Buy and Sell signals
plt.scatter(merged_data['Date'].iloc[buy_signals], merged_data['Close'].iloc[buy_signals], color='green', label='Buy Signal', marker='^', alpha=0.7)
plt.scatter(merged_data['Date'].iloc[sell_signals], merged_data['Close'].iloc[sell_signals], color='red', label='Sell Signal', marker='v', alpha=0.7)

# Customize the plot
plt.title("Buy and Sell Signals Based on Combined Transformers and Decision Transformer Model")
plt.xlabel("Date")
plt.ylabel("Stock Price")
plt.legend()
plt.show()